6
|
1 import unittest
|
|
2 import os
|
|
3
|
|
4 from commons.core.parsing.VarscanFileForGnpSNP import VarscanFileForGnpSNP
|
|
5 from commons.core.parsing.VarscanHitForGnpSNP import VarscanHitForGnpSNP
|
|
6
|
|
7 class Test_VarscanFileForGnpSNP(unittest.TestCase):
|
|
8
|
|
9 def test__init__(self):
|
|
10 expFastqFileName = "SR.fastq"
|
|
11 expRefFastaFileName = "ref.fasta"
|
|
12 expTaxonName = "Arabidopsis thaliana"
|
|
13 expVarscanFieldSeparator = "\t"
|
|
14 expVarscanHitsList = []
|
|
15
|
|
16 iVarscanFileForGnpSNP = VarscanFileForGnpSNP("", expFastqFileName, expRefFastaFileName, expTaxonName)
|
|
17
|
|
18 obsFastaqFileName = iVarscanFileForGnpSNP.getFastqFileName()
|
|
19 obsRefFastaFileName = iVarscanFileForGnpSNP.getRefFastaFileName()
|
|
20 obsTaxonName = iVarscanFileForGnpSNP.getTaxonName()
|
|
21 obsVarscanFieldSeparator = iVarscanFileForGnpSNP.getVarscanFieldSeparator()
|
|
22 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
|
|
23
|
|
24 self.assertEquals(expFastqFileName, obsFastaqFileName)
|
|
25 self.assertEquals(expRefFastaFileName, obsRefFastaFileName)
|
|
26 self.assertEquals(expTaxonName, obsTaxonName)
|
|
27 self.assertEquals(expVarscanFieldSeparator, obsVarscanFieldSeparator)
|
|
28 self.assertEquals(expVarscanHitsList, obsVarscanHitsList)
|
|
29
|
|
30 def test_parse(self):
|
|
31 varscanFileName = "varscan.tab"
|
|
32 self._writeVarscanFile(varscanFileName)
|
|
33
|
|
34 varscanHit1 = VarscanHitForGnpSNP()
|
|
35 varscanHit1.setChrom('C02HBa0291P19_LR48')
|
|
36 varscanHit1.setPosition('32')
|
|
37 varscanHit1.setRef('C')
|
|
38 varscanHit1.setVar('T')
|
|
39 varscanHit1.setReads1('1')
|
|
40 varscanHit1.setReads2('2')
|
|
41 varscanHit1.setVarFreq('66,67%')
|
|
42 varscanHit1.setStrands1('1')
|
|
43 varscanHit1.setStrands2('1')
|
|
44 varscanHit1.setQual1('37')
|
|
45 varscanHit1.setQual2('35')
|
|
46 varscanHit1.setPvalue('0.3999999999999999')
|
|
47 varscanHit1.setGnpSNPRef("C")
|
|
48 varscanHit1.setGnpSNPVar("T")
|
|
49 varscanHit1.setGnpSNPPosition(32)
|
|
50 varscanHit1.setOccurrence(1)
|
|
51 varscanHit1.setPolymType("SNP")
|
|
52 varscanHit1.setPolymLength(1)
|
|
53
|
|
54 varscanHit2 = VarscanHitForGnpSNP()
|
|
55 varscanHit2.setChrom('C02HBa0291P19_LR48')
|
|
56 varscanHit2.setPosition('34')
|
|
57 varscanHit2.setRef('A')
|
|
58 varscanHit2.setVar('T')
|
|
59 varscanHit2.setReads1('1')
|
|
60 varscanHit2.setReads2('2')
|
|
61 varscanHit2.setVarFreq('66,67%')
|
|
62 varscanHit2.setStrands1('1')
|
|
63 varscanHit2.setStrands2('1')
|
|
64 varscanHit2.setQual1('40')
|
|
65 varscanHit2.setQual2('34')
|
|
66 varscanHit2.setPvalue('0.3999999999999999')
|
|
67 varscanHit2.setGnpSNPRef("A")
|
|
68 varscanHit2.setGnpSNPVar("T")
|
|
69 varscanHit2.setGnpSNPPosition(34)
|
|
70 varscanHit2.setOccurrence(1)
|
|
71 varscanHit2.setPolymType("SNP")
|
|
72 varscanHit2.setPolymLength(1)
|
|
73 expVarscanHitsList = [varscanHit1, varscanHit2]
|
|
74
|
|
75 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '')
|
|
76 iVarscanFileForGnpSNP.parse()
|
|
77 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
|
|
78 os.remove(varscanFileName)
|
|
79
|
|
80 self.assertEquals(expVarscanHitsList, obsVarscanHitsList)
|
|
81
|
|
82 def test_parse_with_same_position_and_chr_and_type(self):
|
|
83 varscanFileName = "varscan.tab"
|
|
84 self._writeVarscanFile_2(varscanFileName)
|
|
85
|
|
86 varscanHit1 = VarscanHitForGnpSNP()
|
|
87 varscanHit1.setChrom('C02HBa0291P19_LR48')
|
|
88 varscanHit1.setPosition('32')
|
|
89 varscanHit1.setRef('C')
|
|
90 varscanHit1.setVar('T')
|
|
91 varscanHit1.setReads1('1')
|
|
92 varscanHit1.setReads2('2')
|
|
93 varscanHit1.setVarFreq('66,67%')
|
|
94 varscanHit1.setStrands1('1')
|
|
95 varscanHit1.setStrands2('1')
|
|
96 varscanHit1.setQual1('37')
|
|
97 varscanHit1.setQual2('35')
|
|
98 varscanHit1.setPvalue('0.3999999999999999')
|
|
99 varscanHit1.setOccurrence(1)
|
|
100 varscanHit1._polymType = "SNP"
|
|
101 varscanHit1._gnpSnp_position = 32
|
|
102 varscanHit1._gnpSnp_ref = "C"
|
|
103 varscanHit1._gnpSnp_var = "T"
|
|
104
|
|
105 varscanHit2 = VarscanHitForGnpSNP()
|
|
106 varscanHit2.setChrom('C02HBa0291P19_LR48')
|
|
107 varscanHit2.setPosition('32')
|
|
108 varscanHit2.setRef('C')
|
|
109 varscanHit2.setVar('A')
|
|
110 varscanHit2.setReads1('1')
|
|
111 varscanHit2.setReads2('2')
|
|
112 varscanHit2.setVarFreq('66,67%')
|
|
113 varscanHit2.setStrands1('1')
|
|
114 varscanHit2.setStrands2('1')
|
|
115 varscanHit2.setQual1('37')
|
|
116 varscanHit2.setQual2('35')
|
|
117 varscanHit2.setPvalue('0.3999999999999999')
|
|
118 varscanHit2.setOccurrence(2)
|
|
119 varscanHit2._polymType = "SNP"
|
|
120 varscanHit2._gnpSnp_position = 32
|
|
121 varscanHit2._gnpSnp_ref = "C"
|
|
122 varscanHit2._gnpSnp_var = "T"
|
|
123
|
|
124 expVarscanHitsOccurence = varscanHit2._occurrence
|
|
125
|
|
126 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName, '', '', '')
|
|
127 iVarscanFileForGnpSNP.parse()
|
|
128 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
|
|
129 obsVarscanHitsOccurence = obsVarscanHitsList[1]._occurrence
|
|
130 os.remove(varscanFileName)
|
|
131
|
|
132 self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence)
|
|
133
|
|
134 def test_parse_with_same_position_and_chr_and_different_type(self):
|
|
135 varscanFileName = "varscan.tab"
|
|
136 self._writeVarscanFile_3(varscanFileName)
|
|
137
|
|
138 varscanHit1 = VarscanHitForGnpSNP()
|
|
139 varscanHit1.setChrom('C02HBa0291P19_LR48')
|
|
140 varscanHit1.setPosition('32')
|
|
141 varscanHit1.setRef('C')
|
|
142 varscanHit1.setVar('T')
|
|
143 varscanHit1.setReads1('1')
|
|
144 varscanHit1.setReads2('2')
|
|
145 varscanHit1.setVarFreq('66,67%')
|
|
146 varscanHit1.setStrands1('1')
|
|
147 varscanHit1.setStrands2('1')
|
|
148 varscanHit1.setQual1('37')
|
|
149 varscanHit1.setQual2('35')
|
|
150 varscanHit1.setPvalue('0.3999999999999999')
|
|
151 varscanHit1.setOccurrence(1)
|
|
152 varscanHit1._polymType = "SNP"
|
|
153 varscanHit1._gnpSnp_position = 32
|
|
154 varscanHit1._gnpSnp_ref = "C"
|
|
155 varscanHit1._gnpSnp_var = "T"
|
|
156
|
|
157 varscanHit2 = VarscanHitForGnpSNP()
|
|
158 varscanHit2.setChrom('C02HBa0291P19_LR48')
|
|
159 varscanHit2.setPosition('32')
|
|
160 varscanHit2.setRef('C')
|
|
161 varscanHit2.setVar('+A')
|
|
162 varscanHit2.setReads1('1')
|
|
163 varscanHit2.setReads2('2')
|
|
164 varscanHit2.setVarFreq('66,67%')
|
|
165 varscanHit2.setStrands1('1')
|
|
166 varscanHit2.setStrands2('1')
|
|
167 varscanHit2.setQual1('37')
|
|
168 varscanHit2.setQual2('35')
|
|
169 varscanHit2.setPvalue('0.3999999999999999')
|
|
170 varscanHit2.setOccurrence(1)
|
|
171 varscanHit2._polymType = "SNP"
|
|
172 varscanHit2._gnpSnp_position = 32
|
|
173 varscanHit2._gnpSnp_ref = "C"
|
|
174 varscanHit2._gnpSnp_var = "T"
|
|
175
|
|
176 expVarscanHitsOccurence = varscanHit2._occurrence
|
|
177
|
|
178 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName)
|
|
179 iVarscanFileForGnpSNP.parse()
|
|
180 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
|
|
181 obsVarscanHitsOccurence = obsVarscanHitsList[1].getOccurrence()
|
|
182 os.remove(varscanFileName)
|
|
183
|
|
184 self.assertEquals(expVarscanHitsOccurence, obsVarscanHitsOccurence)
|
|
185
|
|
186 def test_parse_on_occurence(self):
|
|
187 varscanFileName = "varscan.tab"
|
|
188 self._writeVarscanFile_4(varscanFileName)
|
|
189
|
|
190 expOccurrence1 = 1
|
|
191 expOccurrence2 = 1
|
|
192 expOccurrence3 = 2
|
|
193 expOccurrence4 = 1
|
|
194 expOccurrence5 = 1
|
|
195 expOccurrence6 = 2
|
|
196
|
|
197 iVarscanFileForGnpSNP = VarscanFileForGnpSNP(varscanFileName)
|
|
198 iVarscanFileForGnpSNP.parse()
|
|
199 obsVarscanHitsList = iVarscanFileForGnpSNP.getVarscanHitsList()
|
|
200 obsOccurrence1 = obsVarscanHitsList[0].getOccurrence()
|
|
201 obsOccurrence2 = obsVarscanHitsList[1].getOccurrence()
|
|
202 obsOccurrence3 = obsVarscanHitsList[2].getOccurrence()
|
|
203 obsOccurrence4 = obsVarscanHitsList[3].getOccurrence()
|
|
204 obsOccurrence5 = obsVarscanHitsList[4].getOccurrence()
|
|
205 obsOccurrence6 = obsVarscanHitsList[5].getOccurrence()
|
|
206 os.remove(varscanFileName)
|
|
207
|
|
208 self.assertEquals(expOccurrence1, obsOccurrence1)
|
|
209 self.assertEquals(expOccurrence2, obsOccurrence2)
|
|
210 self.assertEquals(expOccurrence3, obsOccurrence3)
|
|
211 self.assertEquals(expOccurrence4, obsOccurrence4)
|
|
212 self.assertEquals(expOccurrence5, obsOccurrence5)
|
|
213 self.assertEquals(expOccurrence6, obsOccurrence6)
|
|
214
|
|
215 def test__eq__notEqual(self):
|
|
216 fastqFileName = "SR.fastq"
|
|
217 refFastaFileName = "ref.fasta"
|
|
218 taxonName = "Arabidopsis thaliana"
|
|
219
|
|
220 iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
|
|
221
|
|
222 fastqFileName = "SR.fastq2"
|
|
223 refFastaFileName = "ref.fasta"
|
|
224 taxonName = "Arabidopsis thaliana"
|
|
225
|
|
226 iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
|
|
227
|
|
228 self.assertFalse(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2)
|
|
229
|
|
230 def test__eq__equal(self):
|
|
231 fastqFileName = "SR.fastq"
|
|
232 refFastaFileName = "ref.fasta"
|
|
233 taxonName = "Arabidopsis thaliana"
|
|
234
|
|
235 varscanHit1 = VarscanHitForGnpSNP()
|
|
236 varscanHit1.setChrom('C02HBa0291P19_LR48')
|
|
237 varscanHit1.setPosition('34')
|
|
238 varscanHit1.setRef('A')
|
|
239 varscanHit1.setVar('T')
|
|
240 varscanHit1.setReads1('1')
|
|
241 varscanHit1.setReads2('2')
|
|
242 varscanHit1.setVarFreq('66,67%')
|
|
243 varscanHit1.setStrands1('1')
|
|
244 varscanHit1.setStrands2('1')
|
|
245 varscanHit1.setQual1('40')
|
|
246 varscanHit1.setQual2('34')
|
|
247 varscanHit1.setPvalue('0.3999999999999999')
|
|
248 lVarscanHits1 = [varscanHit1]
|
|
249
|
|
250 iVarscanFileForGnpSNP1 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
|
|
251 iVarscanFileForGnpSNP1.setVarscanHitsList(lVarscanHits1)
|
|
252
|
|
253 varscanHit2 = VarscanHitForGnpSNP()
|
|
254 varscanHit2.setChrom('C02HBa0291P19_LR48')
|
|
255 varscanHit2.setPosition('34')
|
|
256 varscanHit2.setRef('A')
|
|
257 varscanHit2.setVar('T')
|
|
258 varscanHit2.setReads1('1')
|
|
259 varscanHit2.setReads2('2')
|
|
260 varscanHit2.setVarFreq('66,67%')
|
|
261 varscanHit2.setStrands1('1')
|
|
262 varscanHit2.setStrands2('1')
|
|
263 varscanHit2.setQual1('40')
|
|
264 varscanHit2.setQual2('34')
|
|
265 varscanHit2.setPvalue('0.3999999999999999')
|
|
266 lVarscanHits2 = [varscanHit2]
|
|
267
|
|
268 iVarscanFileForGnpSNP2 = VarscanFileForGnpSNP("", fastqFileName, refFastaFileName, taxonName)
|
|
269 iVarscanFileForGnpSNP2.setVarscanHitsList(lVarscanHits2)
|
|
270
|
|
271 self.assertTrue(iVarscanFileForGnpSNP1 == iVarscanFileForGnpSNP2)
|
|
272
|
|
273 def _writeVarscanFile(self, varscanFileName):
|
|
274 varscanFile = open(varscanFileName, 'w')
|
|
275 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
|
|
276 varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
|
|
277 varscanFile.write("C02HBa0291P19_LR48\t34\tA\tT\t1\t2\t66,67%\t1\t1\t40\t34\t0.3999999999999999\n")
|
|
278 varscanFile.close()
|
|
279
|
|
280 def _writeVarscanFile_2(self, varscanFileName):
|
|
281 varscanFile = open(varscanFileName, 'w')
|
|
282 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
|
|
283 varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
|
|
284 varscanFile.write("C02HBa0291P19_LR48\t32\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
|
|
285 varscanFile.close()
|
|
286
|
|
287 def _writeVarscanFile_3(self, varscanFileName):
|
|
288 varscanFile = open(varscanFileName, 'w')
|
|
289 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
|
|
290 varscanFile.write("C02HBa0291P19_LR48\t32\tC\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
|
|
291 varscanFile.write("C02HBa0291P19_LR48\t32\tC\t+A\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
|
|
292 varscanFile.close()
|
|
293
|
|
294 def _writeVarscanFile_4(self, varscanFileName):
|
|
295 varscanFile = open(varscanFileName, 'w')
|
|
296 varscanFile.write("Chrom\tPosition\tRef\tVar\tReads1\tReads2\tVarFreq\tStrands1\tStrands2\tQual1\tQual2\tPvalue\n")
|
|
297 varscanFile.write("seqname\t2\tA\tT\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
|
|
298 varscanFile.write("seqname\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
|
|
299 varscanFile.write("seqname\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
|
|
300 varscanFile.write("seqname\t8\tT\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
|
|
301 varscanFile.write("chrom\t4\tC\tG\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
|
|
302 varscanFile.write("chrom\t4\tC\tA\t1\t2\t66,67%\t1\t1\t37\t35\t0.3999999999999999\n")
|
|
303 varscanFile.close()
|
|
304 if __name__ == "__main__":
|
|
305 unittest.main() |