6
|
1 import os
|
|
2 import shutil
|
|
3 import unittest
|
|
4 from commons.core.utils.FileUtils import FileUtils
|
|
5 from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile
|
|
6 from commons.core.parsing.Multifasta2SNPFile import ReferenceBioseqAndLinesBioseqDBWrapper
|
|
7 from commons.core.seq.Bioseq import Bioseq
|
|
8 from commons.core.seq.BioseqDB import BioseqDB
|
|
9 from smac_pipe.tests.Utils4Test import Utils4Test
|
|
10
|
|
11
|
|
12 class Test_Multifasta2SNPFile(unittest.TestCase):
|
|
13 # TODO TEST LOGFILE
|
|
14 def setUp(self):
|
|
15 os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"])
|
|
16 self._inFileName = "multifasta_input.fasta"
|
|
17
|
|
18 self._expSubSNPFileName = "%s/commons/core/parsing/test/expSubSNP.csv" % os.environ["REPET_PATH"]
|
|
19 self._expAlleleFileName = "%s/commons/core/parsing/test/expAllele.csv" % os.environ["REPET_PATH"]
|
|
20
|
|
21 self._expIndividualFileName = "%s/commons/core/parsing/test/expIndividual.csv" % os.environ["REPET_PATH"]
|
|
22 self._expSequenceFSAFileName = "%s/commons/core/parsing/test/expSequences.fsa" % os.environ["REPET_PATH"]
|
|
23 self._expSequenceCSVFileName = "%s/commons/core/parsing/test/expSequences.csv" % os.environ["REPET_PATH"]
|
|
24 self._expBatchFileName = "%s/commons/core/parsing/test/expBatch.txt" % os.environ["REPET_PATH"]
|
|
25 self._expBatchLineFileName = "%s/commons/core/parsing/test/expBatchLine.csv" % os.environ["REPET_PATH"]
|
|
26
|
|
27 self._realInputFileName = "data/real_multifasta_input.fasta"
|
|
28 self._realExpSubSNPFileName = "data/realExpSubSNP.csv"
|
|
29 self._realExpSequenceFSAFileName = "data/realExpSequences.fsa"
|
|
30 self._realExpBatchLineFileName = "data/realExpBatchLine.csv"
|
|
31 self._realExpIndividualFileName = "data/realExpIndividual.csv"
|
|
32
|
|
33 self._inputDirSeveralBatches = "%s/commons/core/parsing/test/severalBatchDir" % os.environ["REPET_PATH"]
|
|
34
|
|
35 self._obsSubSNPFileName = "SubSNP.csv"
|
|
36 self._obsAlleleFileName = "Allele.csv"
|
|
37 self._obsIndividualFileName = "Individual.csv"
|
|
38 self._obsSequenceFSAFileName = "Sequences.fsa"
|
|
39 self._obsSequenceCSVFileName = "Sequences.csv"
|
|
40 self._obsBatchFileName = "Batch.txt"
|
|
41 self._obsBatchLineFileName = "BatchLine.csv"
|
|
42
|
|
43 self._fileUtils = FileUtils()
|
|
44
|
|
45 def tearDown(self):
|
|
46 os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"])
|
|
47 logFileName = "multifasta2SNP.log"
|
|
48 if self._fileUtils.isRessourceExists(self._inFileName):
|
|
49 os.remove(self._inFileName)
|
|
50 if self._fileUtils.isRessourceExists(self._obsSubSNPFileName):
|
|
51 os.remove(self._obsSubSNPFileName)
|
|
52 if self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered"):
|
|
53 os.remove(self._obsSubSNPFileName + "_filtered")
|
|
54 if self._fileUtils.isRessourceExists(self._obsAlleleFileName):
|
|
55 os.remove(self._obsAlleleFileName)
|
|
56 if self._fileUtils.isRessourceExists(self._obsIndividualFileName):
|
|
57 os.remove(self._obsIndividualFileName)
|
|
58 if self._fileUtils.isRessourceExists(self._obsSequenceFSAFileName):
|
|
59 os.remove(self._obsSequenceFSAFileName)
|
|
60 if self._fileUtils.isRessourceExists(self._obsSequenceCSVFileName):
|
|
61 os.remove(self._obsSequenceCSVFileName)
|
|
62 if self._fileUtils.isRessourceExists(self._obsBatchFileName):
|
|
63 os.remove(self._obsBatchFileName)
|
|
64 if self._fileUtils.isRessourceExists(self._obsBatchLineFileName):
|
|
65 os.remove(self._obsBatchLineFileName)
|
|
66
|
|
67 if self._fileUtils.isRessourceExists(self._expSubSNPFileName):
|
|
68 os.remove(self._expSubSNPFileName)
|
|
69 if self._fileUtils.isRessourceExists(self._realExpSubSNPFileName + "_filtered"):
|
|
70 os.remove(self._realExpSubSNPFileName + "_filtered")
|
|
71 if self._fileUtils.isRessourceExists(self._expAlleleFileName):
|
|
72 os.remove(self._expAlleleFileName)
|
|
73 if self._fileUtils.isRessourceExists(self._expIndividualFileName):
|
|
74 os.remove(self._expIndividualFileName)
|
|
75 if self._fileUtils.isRessourceExists(self._expSequenceFSAFileName):
|
|
76 os.remove(self._expSequenceFSAFileName)
|
|
77 if self._fileUtils.isRessourceExists(self._expSequenceCSVFileName):
|
|
78 os.remove(self._expSequenceCSVFileName)
|
|
79 if self._fileUtils.isRessourceExists(self._expBatchFileName):
|
|
80 os.remove(self._expBatchFileName)
|
|
81 if self._fileUtils.isRessourceExists(self._expBatchLineFileName):
|
|
82 os.remove(self._expBatchLineFileName)
|
|
83
|
|
84 if self._fileUtils.isRessourceExists(logFileName):
|
|
85 os.remove(logFileName)
|
|
86 if self._fileUtils.isRessourceExists(self._inputDirSeveralBatches):
|
|
87 shutil.rmtree(self._inputDirSeveralBatches)
|
|
88
|
|
89
|
|
90 def test_runOneBatch(self):
|
|
91 self._writeInputFile()
|
|
92 self._writeExpSubSNPFile()
|
|
93 self._writeExpAlleleFile()
|
|
94 self._writeExpIndividualFile()
|
|
95 self._writeExpSequenceFile()
|
|
96 self._writeExpBatchFile()
|
|
97 self._writeExpBatchLineFile()
|
|
98
|
|
99 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase")
|
|
100 multifasta2SNPFile.runOneBatch(self._inFileName)
|
|
101
|
|
102 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName))
|
|
103 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName))
|
|
104
|
|
105 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName))
|
|
106 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName))
|
|
107
|
|
108 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName))
|
|
109 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName))
|
|
110
|
|
111 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName))
|
|
112 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName))
|
|
113
|
|
114 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName))
|
|
115 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName))
|
|
116
|
|
117 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName))
|
|
118 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName))
|
|
119 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName))
|
|
120 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName))
|
|
121
|
|
122 def test_runOneBatch_with_a_real_input_file(self):
|
|
123 self._writeRealExpAlleleFile()
|
|
124 self._writeRealExpSequenceCSVFile()
|
|
125 self._writeRealExpBatchFile()
|
|
126
|
|
127 multifasta2SNPFile = Multifasta2SNPFile("Pinus pinaster", "INRA_Pinus_pinaster_HDZ31-1", "PpHDZ31")
|
|
128 multifasta2SNPFile.runOneBatch(self._realInputFileName)
|
|
129
|
|
130 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName))
|
|
131 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpIndividualFileName, self._obsIndividualFileName))
|
|
132
|
|
133 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName))
|
|
134 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpSequenceFSAFileName, self._obsSequenceFSAFileName))
|
|
135
|
|
136 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName))
|
|
137 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName))
|
|
138
|
|
139 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName))
|
|
140 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName))
|
|
141
|
|
142 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName))
|
|
143 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpBatchLineFileName, self._obsBatchLineFileName))
|
|
144
|
|
145 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName))
|
|
146 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName))
|
|
147
|
|
148 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName))
|
|
149 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpSubSNPFileName , self._obsSubSNPFileName))
|
|
150
|
|
151 def test_runOneBatch_with_errors_in_refSeq(self):
|
|
152 self._writeInputFileWithSeqErrorsInRefSeq()
|
|
153 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase")
|
|
154 self.assertRaises(Exception, multifasta2SNPFile.runOneBatch, self._inFileName, self._obsSubSNPFileName)
|
|
155
|
|
156 def test_runOneBatch_with_errors_in_lineSeq(self):
|
|
157 self._writeInputFileWithSeqErrorsInOneLineSeq()
|
|
158 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase")
|
|
159 self.assertRaises(Exception, multifasta2SNPFile.runOneBatch, self._inFileName, self._obsSubSNPFileName)
|
|
160
|
|
161 def test_runOneBatch_with_a_several_lineSeq(self):
|
|
162 self._writeInputFileWithASeveralLineSeq()
|
|
163 self._writeExpSubSNPFileSeveralLineSeq()
|
|
164 self._writeExpAlleleFile()
|
|
165 self._writeExpIndividualFile()
|
|
166 self._writeExpSequenceFileSeveralLineSeq()
|
|
167 self._writeExpBatchFile()
|
|
168 self._writeExpBatchLineFile()
|
|
169
|
|
170 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase")
|
|
171 multifasta2SNPFile.runOneBatch(self._inFileName)
|
|
172
|
|
173 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName))
|
|
174 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName))
|
|
175
|
|
176 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName))
|
|
177 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName))
|
|
178
|
|
179 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName))
|
|
180 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName))
|
|
181
|
|
182 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName))
|
|
183 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName))
|
|
184
|
|
185 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName))
|
|
186 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName))
|
|
187
|
|
188 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName))
|
|
189 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName))
|
|
190
|
|
191 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName))
|
|
192 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName))
|
|
193
|
|
194 def test_runOneBatch_with_2_seqs_with_the_same_name(self):
|
|
195 self._writeInputFileWith2SeqsWithTheSameName()
|
|
196 batchName = "batch1"
|
|
197 taxon = "Arabidopsis thaliana"
|
|
198 gene = "methyltransferase"
|
|
199 isSysExitRaised = False
|
|
200 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
201
|
|
202 try:
|
|
203 multifasta2SNPFile.runOneBatch(self._inFileName)
|
|
204 except SystemExit:
|
|
205 isSysExitRaised = True
|
|
206
|
|
207 self.assertTrue(isSysExitRaised)
|
|
208
|
|
209 def test_runOneBatch_with_indels_and_snps(self):
|
|
210 self._writeInputFileWithSnpsAndIndels()
|
|
211 self._writeExpSubSNPFileWithSnpsAndIndels()
|
|
212 self._writeExpAlleleFileWithSnpsAndIndels()
|
|
213 self._writeExpIndividualFile()
|
|
214 self._writeExpSequenceFileWithDeletion()
|
|
215 self._writeExpBatchFile()
|
|
216 self._writeExpBatchLineFile()
|
|
217
|
|
218 batchName = "Batch1"
|
|
219 taxon = "Arabidopsis thaliana"
|
|
220 gene = "methyltransferase"
|
|
221 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
222 multifasta2SNPFile.runOneBatch(self._inFileName)
|
|
223
|
|
224 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName))
|
|
225 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName))
|
|
226
|
|
227 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName))
|
|
228 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName))
|
|
229
|
|
230 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName))
|
|
231 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName))
|
|
232
|
|
233 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName))
|
|
234 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName))
|
|
235
|
|
236 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName))
|
|
237 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName))
|
|
238
|
|
239 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName))
|
|
240 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName))
|
|
241
|
|
242 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName))
|
|
243 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName))
|
|
244
|
|
245 def test_runOneBatchWithPotentialDooblons(self):
|
|
246 self._writeInputFileBatchWithPotentialDooblons()
|
|
247
|
|
248 batchName = "Batch_AU247387"
|
|
249 taxon = "Arabidopsis thaliana"
|
|
250 gene = "methyltransferase"
|
|
251 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
252 multifasta2SNPFile.runOneBatch(self._inFileName)
|
|
253 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName))
|
|
254
|
|
255 expSubSNPFile = "data/ExpPotDooblonsSubSNP.csv"
|
|
256
|
|
257 Utils4Test.removeOneSpecifiedColumn(expSubSNPFile, ";", 8)
|
|
258 Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName, ";", 8)
|
|
259
|
|
260 Utils4Test.removeOneSpecifiedColumn(expSubSNPFile + "_filtered", ";", 9)
|
|
261 Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName + "_filtered", ";", 9)
|
|
262
|
|
263 Utils4Test.removeOneSpecifiedColumn(expSubSNPFile + "_filtered_filtered", ";", 13)
|
|
264 Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName + "_filtered_filtered", ";", 13)
|
|
265
|
|
266 comparableExpSubSNPFile = expSubSNPFile + "_filtered_filtered_filtered"
|
|
267 comparableObsSubSNPFile = self._obsSubSNPFileName + "_filtered_filtered_filtered"
|
|
268
|
|
269 self.assertTrue(FileUtils.isRessourceExists(comparableExpSubSNPFile))
|
|
270 self.assertTrue(FileUtils.isRessourceExists(comparableObsSubSNPFile))
|
|
271 self.assertTrue(FileUtils.are2FilesIdentical(comparableExpSubSNPFile, comparableObsSubSNPFile))
|
|
272
|
|
273 if(self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered")):
|
|
274 os.remove(self._obsSubSNPFileName + "_filtered")
|
|
275 if(self._fileUtils.isRessourceExists(expSubSNPFile + "_filtered")):
|
|
276 os.remove(expSubSNPFile + "_filtered")
|
|
277
|
|
278 if(self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered_filtered")):
|
|
279 os.remove(self._obsSubSNPFileName + "_filtered_filtered")
|
|
280 if(self._fileUtils.isRessourceExists(expSubSNPFile + "_filtered_filtered")):
|
|
281 os.remove(expSubSNPFile + "_filtered_filtered")
|
|
282
|
|
283 if self._fileUtils.isRessourceExists(comparableExpSubSNPFile):
|
|
284 os.remove(comparableExpSubSNPFile)
|
|
285 if self._fileUtils.isRessourceExists(comparableObsSubSNPFile):
|
|
286 os.remove(comparableObsSubSNPFile)
|
|
287
|
|
288 def test_runSeveralBatches(self):
|
|
289 self._writeInputFileSeveralBatches()
|
|
290 self._writeExpSubSNPFileSeveralBatches()
|
|
291 self._writeExpAlleleFileSeveralBatches()
|
|
292 self._writeExpIndividualFile()
|
|
293 self._writeExpSequenceSeveralBatches()
|
|
294 self._writeExpBatchFileSeveralBatches()
|
|
295 self._writeExpBatchLineFileSeveralBatches()
|
|
296
|
|
297 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana")
|
|
298 multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches)
|
|
299
|
|
300 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsAlleleFileName))
|
|
301 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._inputDirSeveralBatches + "/" + self._obsAlleleFileName))
|
|
302
|
|
303 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" +self._obsIndividualFileName))
|
|
304 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._inputDirSeveralBatches + "/" + self._obsIndividualFileName))
|
|
305
|
|
306 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName))
|
|
307 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName))
|
|
308
|
|
309 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName))
|
|
310 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName))
|
|
311
|
|
312 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchFileName))
|
|
313 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._inputDirSeveralBatches + "/" + self._obsBatchFileName))
|
|
314
|
|
315 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName))
|
|
316 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName))
|
|
317 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName))
|
|
318 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName))
|
|
319
|
|
320 def test_runSeveralBatches_different_lines_between_files(self):
|
|
321 self._writeInputFileSeveralBatches_different_lines_between_files()
|
|
322 self._writeExpSubSNPFileSeveralBatches_different_lines_between_files()
|
|
323 self._writeExpAlleleFileSeveralBatches()
|
|
324 self._writeExpIndividualFile_different_lines_between_files()
|
|
325 self._writeExpSequenceSeveralBatches()
|
|
326 self._writeExpBatchFileSeveralBatches()
|
|
327 self._writeExpBatchLineFileSeveralBatches_different_lines_between_files()
|
|
328
|
|
329 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana")
|
|
330 multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches)
|
|
331
|
|
332 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsAlleleFileName))
|
|
333 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._inputDirSeveralBatches + "/" + self._obsAlleleFileName))
|
|
334
|
|
335 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" +self._obsIndividualFileName))
|
|
336 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._inputDirSeveralBatches + "/" + self._obsIndividualFileName))
|
|
337
|
|
338 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName))
|
|
339 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName))
|
|
340
|
|
341 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName))
|
|
342 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName))
|
|
343
|
|
344 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchFileName))
|
|
345 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._inputDirSeveralBatches + "/" + self._obsBatchFileName))
|
|
346
|
|
347 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName))
|
|
348 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName))
|
|
349 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName))
|
|
350 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName))
|
|
351
|
|
352 def test_runSeveralBatches_different_lines_and_same_refseq_between_files(self):
|
|
353 self._writeInputFileSeveralBatches_different_lines_and_same_refseq_between_files()
|
|
354 self._writeExpSubSNPFileSeveralBatches_different_lines_between_files()
|
|
355 self._writeExpAlleleFileSeveralBatches()
|
|
356 self._writeExpIndividualFile_different_lines_between_files()
|
|
357 self._writeExpSequenceSeveralBatchesForSameRefSeq()
|
|
358 self._writeExpBatchFileSeveralBatchesForSameRefSeq()
|
|
359 self._writeExpBatchLineFileSeveralBatches_different_lines_between_files()
|
|
360
|
|
361 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana")
|
|
362 try:
|
|
363 multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches)
|
|
364 except Exception, e :
|
|
365 self.assertRaises(Exception, e)
|
|
366
|
|
367 def test_detectSNPAndIndels(self):
|
|
368 refBioseq = Bioseq()
|
|
369 alignedBioseqDB = BioseqDB()
|
|
370 batchName = "batch1"
|
|
371 taxon = "Arabidopsis thaliana"
|
|
372 gene = "methyltransferase"
|
|
373 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
374 refBioseq.sequence = "ATTCGCGTATGCGTATGCTT"
|
|
375 refBioseq.header = "reference"
|
|
376
|
|
377 bs1 = Bioseq( "line1", "ATCCGCGTATGCGTATGATT" )
|
|
378 bs2 = Bioseq( "line2", "ATTCGTGTATGCGTATGGTT" )
|
|
379
|
|
380 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
381
|
|
382 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
383 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
|
|
384 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
|
|
385
|
|
386 dExpAllele = {'C': 1, 'A': 2, 'T': 3, 'G': 4 }
|
|
387 lExpSNP = [{'subSNPName': batchName + "_SNP_3_line1", 'position': 3, 'lineName': 1, 'allele': 1, '5flank': "AT", '3flank': "CGCGTATGCGTATGATT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
388 {'subSNPName': batchName + "_SNP_3_line2", 'position': 3, 'lineName': 2, 'allele': 3, '5flank': "AT", '3flank': "CGTGTATGCGTATGGTT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
389 {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 3, '5flank': "ATTCG", '3flank': "GTATGCGTATGGTT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
390 {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 1, '5flank': "ATCCG", '3flank': "GTATGCGTATGATT",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
391 {'subSNPName': batchName + "_SNP_18_line1", 'position': 18, 'lineName': 1, 'allele': 2, '5flank': "ATCCGCGTATGCGTATG", '3flank': "TT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
392 {'subSNPName': batchName + "_SNP_18_line2", 'position': 18, 'lineName': 2, 'allele': 4, '5flank': "ATTCGTGTATGCGTATG", '3flank': "TT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}]
|
|
393 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
|
|
394 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},]
|
|
395
|
|
396 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
|
|
397 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
|
|
398 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
|
|
399
|
|
400 def test_detectSNPAndIndels_no_polym(self):
|
|
401 refBioseq = Bioseq()
|
|
402 alignedBioseqDB = BioseqDB()
|
|
403 batchName = "batch1"
|
|
404 taxon = "Arabidopsis thaliana"
|
|
405 gene = "methyltransferase"
|
|
406 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
407 refBioseq.sequence = "ATTCGCGTATGCGTATGCTT"
|
|
408 refBioseq.header = "reference"
|
|
409
|
|
410 bs1 = Bioseq( "line1", "ATTCGCGTATGCGTATGCTT" )
|
|
411 bs2 = Bioseq( "line2", "ATTCGCGTATGCGTATGCTT" )
|
|
412
|
|
413 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
414
|
|
415 instance = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
416
|
|
417 multifasta2SNPFile.detectSNPsAndIndels(instance)
|
|
418
|
|
419 lExpSNP = []
|
|
420
|
|
421 self.assertEquals(lExpSNP, multifasta2SNPFile._lSubSNPFileResults)
|
|
422
|
|
423 def test_detectSNPAndIndels_with_only_dels(self):
|
|
424 refBioseq = Bioseq()
|
|
425 alignedBioseqDB = BioseqDB()
|
|
426 batchName = "batch1"
|
|
427 taxon = "Arabidopsis thaliana"
|
|
428 gene = "methyltransferase"
|
|
429 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
430 refBioseq.sequence = "ATTACCGAA"
|
|
431 refBioseq.header = "reference"
|
|
432
|
|
433 bs1 = Bioseq( "line1", "A--ACCGAA" )
|
|
434 bs2 = Bioseq( "line2", "---ACCGAA" )
|
|
435
|
|
436 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
437
|
|
438 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
439 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
|
|
440 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
|
|
441
|
|
442 dExpAllele = {'A--': 1, '---': 2}
|
|
443 lExpSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 3},
|
|
444 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 1, '5flank': "", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 3}]
|
|
445 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
|
|
446 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}]
|
|
447
|
|
448 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
|
|
449 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
|
|
450 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
|
|
451
|
|
452 def test_detectSNPAndIndels_with_dels_and_snps(self):
|
|
453 refBioseq = Bioseq()
|
|
454 alignedBioseqDB = BioseqDB()
|
|
455 batchName = "batch1"
|
|
456 taxon = "Arabidopsis thaliana"
|
|
457 gene = "methyltransferase"
|
|
458 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
459 refBioseq.sequence = "ATTACCGAA"
|
|
460 refBioseq.header = "reference"
|
|
461
|
|
462 bs1 = Bioseq( "line1", "A--ACCGAA" )
|
|
463 bs2 = Bioseq( "line2", "---ACCGAA" )
|
|
464 bs3 = Bioseq( "line3", "ATTACCGGA" )
|
|
465 bs4 = Bioseq( "line4", "----CCGAA" )
|
|
466
|
|
467 alignedBioseqDB.setData( [ bs1, bs2, bs3, bs4 ] )
|
|
468
|
|
469 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
470 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
|
|
471 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
|
|
472
|
|
473 dExpAllele = {'G': 1, 'A--A': 2, '---A': 3, '----': 4, 'ATTA': 5, 'A': 6}
|
|
474 lExpSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
|
|
475 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
|
|
476 {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
477 {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
478 {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
479 {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
480 {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
|
|
481 {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}]
|
|
482 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
|
|
483 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},
|
|
484 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"},
|
|
485 {'individualNumber': 4, 'individualName': "line4", 'scientificName': "Arabidopsis thaliana"}]
|
|
486
|
|
487 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
|
|
488 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
|
|
489 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
|
|
490
|
|
491 def test_detectSNPAndIndels_with_only_inserts(self):
|
|
492 refBioseq = Bioseq()
|
|
493 alignedBioseqDB = BioseqDB()
|
|
494 batchName = "batch1"
|
|
495 taxon = "Arabidopsis thaliana"
|
|
496 gene = "methyltransferase"
|
|
497 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
498 refBioseq.sequence = "A--ACCGAA"
|
|
499 refBioseq.header = "reference"
|
|
500
|
|
501 bs1 = Bioseq( "line1", "A--ACCGAA" )
|
|
502 bs2 = Bioseq( "line2", "AG-ACCGAA" )
|
|
503 bs3 = Bioseq( "line3", "ATTACCGAA" )
|
|
504
|
|
505 alignedBioseqDB.setData( [ bs1, bs2, bs3 ] )
|
|
506
|
|
507 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
508 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
|
|
509 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
|
|
510
|
|
511 dExpAllele = {'G-': 1, 'TT': 2, '--': 3}
|
|
512 lExpSNP = [{'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 1, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
|
|
513 {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 2, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
|
|
514 {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 3, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}]
|
|
515 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
|
|
516 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},
|
|
517 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}]
|
|
518
|
|
519 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
|
|
520 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
|
|
521 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
|
|
522
|
|
523 def test_detectSNPAndIndels_with_snps_and_inserts(self):
|
|
524 refBioseq = Bioseq()
|
|
525 alignedBioseqDB = BioseqDB()
|
|
526 batchName = "batch1"
|
|
527 taxon = "Arabidopsis thaliana"
|
|
528 gene = "methyltransferase"
|
|
529 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
530 refBioseq.sequence = "A--ACCGAA"
|
|
531 refBioseq.header = "reference"
|
|
532
|
|
533 bs1 = Bioseq( "line1", "A--ACCGAA" )
|
|
534 bs2 = Bioseq( "line2", "AG-ACCGAA" )
|
|
535 bs3 = Bioseq( "line3", "ATTACCGCA" )
|
|
536
|
|
537 alignedBioseqDB.setData( [ bs1, bs2, bs3 ] )
|
|
538
|
|
539 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
540 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
|
|
541 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
|
|
542
|
|
543 dExpAllele = {'C': 1, 'G-': 2, 'TT': 3, '--': 4, 'A' : 5}
|
|
544 lExpSNP = [{'subSNPName': batchName + "_SNP_6_line3", 'position': 6, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
545 {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 5, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
546 {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 5, '5flank': "AG-ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
547 {'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
|
|
548 {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 3, '5flank': "A", '3flank': "ACCGCA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
|
|
549 {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 4, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}]
|
|
550 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
|
|
551 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},
|
|
552 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}]
|
|
553
|
|
554 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
|
|
555 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
|
|
556 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
|
|
557
|
|
558 def test_detectSNPAndIndels_with_snps_inserts_and_dels(self):
|
|
559 refBioseq = Bioseq()
|
|
560 alignedBioseqDB = BioseqDB()
|
|
561 batchName = "batch1"
|
|
562 taxon = "Arabidopsis thaliana"
|
|
563 gene = "methyltransferase"
|
|
564 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
565 refBioseq.sequence = "A--ACCGAATATAC"
|
|
566 refBioseq.header = "reference"
|
|
567
|
|
568 bs1 = Bioseq( "line1", "A--ACCGAATATAC" )
|
|
569 bs2 = Bioseq( "line2", "AG-ACCGAAT--AC" )
|
|
570 bs3 = Bioseq( "line3", "ATTACCGCA-----" )
|
|
571
|
|
572 alignedBioseqDB.setData( [ bs1, bs2, bs3 ] )
|
|
573
|
|
574 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
575 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
|
|
576 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
|
|
577
|
|
578 dExpAllele = {'C': 1, 'G-': 2, 'T--AC': 3, 'TT': 4, '-----': 5, '--': 6, 'TATAC': 7, 'A': 8}
|
|
579 lExpSNP = [{'subSNPName': batchName + "_SNP_6_line3", 'position': 6, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A-----", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
580 {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 8, '5flank': "A--ACCG", '3flank': "ATATAC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
581 {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 8, '5flank': "AG-ACCG", '3flank': "AT--AC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
582
|
|
583 {'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "A", '3flank': "ACCGAAT--AC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
|
|
584 {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 4, '5flank': "A", '3flank': "ACCGCA-----", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
|
|
585 {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 6, '5flank': "A", '3flank': "ACCGAATATAC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
|
|
586
|
|
587 {'subSNPName': batchName + "_DEL_8_line2", 'position': 8, 'lineName': 2, 'allele': 3, '5flank': "AG-ACCGAA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5},
|
|
588 {'subSNPName': batchName + "_DEL_8_line3", 'position': 8, 'lineName': 3, 'allele': 5, '5flank': "ATTACCGCA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5},
|
|
589 {'subSNPName': batchName + "_DEL_8_line1", 'position': 8, 'lineName': 1, 'allele': 7, '5flank': "A--ACCGAA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5}]
|
|
590 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
|
|
591 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},
|
|
592 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}]
|
|
593
|
|
594 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
|
|
595 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
|
|
596 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
|
|
597
|
|
598 def test_createWrapperFromFile_with_upcase_and_lowcase_nucleotide(self):
|
|
599 self._writeInputFileWithUpcaseAndLowcaseNucleotide()
|
|
600 batchName = "batch1"
|
|
601 taxon = "Arabidopsis thaliana"
|
|
602 gene = "methyltransferase"
|
|
603 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
604
|
|
605 expLineBioseqDB = BioseqDB()
|
|
606 expRefBioseq = Bioseq("Sequence_de_Reference",\
|
|
607 "CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA")
|
|
608 iBioSeq = Bioseq("Line1","CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA")
|
|
609 expLineBioseqDB.add ( iBioSeq )
|
|
610 iBioSeq = Bioseq("Line2","CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA")
|
|
611 expLineBioseqDB.add ( iBioSeq )
|
|
612
|
|
613 expBioseqDBWrapper = ReferenceBioseqAndLinesBioseqDBWrapper (expRefBioseq, expLineBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
614
|
|
615 obsBioseqDBWrapper = multifasta2SNPFile.createWrapperFromFile(self._inFileName)
|
|
616
|
|
617 self.assertEquals(obsBioseqDBWrapper._iReferenceBioseq, expBioseqDBWrapper._iReferenceBioseq)
|
|
618 self.assertEquals(obsBioseqDBWrapper._iLinesBioseqDB, expBioseqDBWrapper._iLinesBioseqDB)
|
|
619
|
|
620 def test_checkHeaderAlphabet(self):
|
|
621 # header ALPHABET [^a-zA-Z0-9_-:]
|
|
622 batchName = "batch1"
|
|
623 taxon = "Arabidopsis thaliana"
|
|
624 gene = "methyltransferase"
|
|
625 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
626 strToBeCheck="abcdefghijklmnopqrstuvwxyz0912834567_:-"
|
|
627 self.assertTrue ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck))
|
|
628 strToBeCheck="ABCDEFGHIJKLMNOPQRSTUVWXYZ0912834567_:-"
|
|
629 self.assertTrue ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck))
|
|
630
|
|
631 def test_checkHeaderAlphabet_empty_string(self):
|
|
632 batchName = "batch1"
|
|
633 taxon = "Arabidopsis thaliana"
|
|
634 gene = "methyltransferase"
|
|
635 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
636 strToBeCheck=""
|
|
637 self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck))
|
|
638
|
|
639 def test_checkHeaderAlphabet_space(self):
|
|
640 batchName = "batch1"
|
|
641 taxon = "Arabidopsis thaliana"
|
|
642 gene = "methyltransferase"
|
|
643 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
644 strToBeCheck=" "
|
|
645 self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck))
|
|
646
|
|
647 def test_checkHeaderAlphabet_non_aphabetical(self):
|
|
648 batchName = "batch1"
|
|
649 taxon = "Arabidopsis thaliana"
|
|
650 gene = "methyltransferase"
|
|
651 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
652 strToBeCheck="}"
|
|
653 self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck))
|
|
654
|
|
655 def test_isDNA_bases( self ):
|
|
656 batchName = "batch1"
|
|
657 taxon = "Arabidopsis thaliana"
|
|
658 gene = "methyltransferase"
|
|
659 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
660 strToBeCheck="TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA"
|
|
661 self.assertTrue ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
|
|
662
|
|
663 def test_isDNA_bases_non_DNA_letter( self ):
|
|
664 batchName = "batch1"
|
|
665 taxon = "Arabidopsis thaliana"
|
|
666 gene = "methyltransferase"
|
|
667 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
668 strToBeCheck="XTAGTTGATCA"
|
|
669 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
|
|
670
|
|
671 def test_isDNA_bases_carriage_return( self ):
|
|
672 batchName = "batch1"
|
|
673 taxon = "Arabidopsis thaliana"
|
|
674 gene = "methyltransferase"
|
|
675 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
676 strToBeCheck="TA\nGTTGATCA"
|
|
677 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
|
|
678
|
|
679 def test_isDNA_bases_empty_string( self ):
|
|
680 batchName = "batch1"
|
|
681 taxon = "Arabidopsis thaliana"
|
|
682 gene = "methyltransferase"
|
|
683 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
684 strToBeCheck=""
|
|
685 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
|
|
686
|
|
687 def test_isDNA_bases_space( self ):
|
|
688 batchName = "batch1"
|
|
689 taxon = "Arabidopsis thaliana"
|
|
690 gene = "methyltransferase"
|
|
691 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
692 strToBeCheck=" "
|
|
693 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
|
|
694
|
|
695 def test_isDNA_bases_IUPAC_letter_but_non_DNA_bases( self ):
|
|
696 batchName = "batch1"
|
|
697 taxon = "Arabidopsis thaliana"
|
|
698 gene = "methyltransferase"
|
|
699 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
700 strToBeCheck="UMWSB"
|
|
701 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
|
|
702
|
|
703 def test_getLineAsAHeader (self):
|
|
704 lineToBeCheck=">test on good header"
|
|
705 batchName = "batch1"
|
|
706 expHeader = "test_on_good_header"
|
|
707 taxon = "Arabidopsis thaliana"
|
|
708 gene = "methyltransferase"
|
|
709 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
710 obsHeader = multifasta2SNPFile.getLineAsAHeader(lineToBeCheck)
|
|
711 self.assertEqual(obsHeader,expHeader)
|
|
712
|
|
713 def test_getLineAsAHeader_warning_bad_header_tag_omitted(self):
|
|
714
|
|
715 lineToBeCheck="test on bad header with tag omitted"
|
|
716 batchName = "batch1"
|
|
717 taxon = "Arabidopsis thaliana"
|
|
718 gene = "methyltransferase"
|
|
719 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
720 try :
|
|
721 expHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck )
|
|
722 except Exception, e :
|
|
723 self.assertRaises(Exception, e , self._inFileName, self._obsSubSNPFileName)
|
|
724
|
|
725 def test_getLineAsAHeader_warning_repeated_blanks_removed(self):
|
|
726
|
|
727 lineToBeCheck =">test on header \twith warning"
|
|
728 expHeader = "test_on_header_with_warning"
|
|
729 batchName = "batch1"
|
|
730 taxon = "Arabidopsis thaliana"
|
|
731 gene = "methyltransferase"
|
|
732 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
733 obsHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck )
|
|
734 self.assertEquals( obsHeader, expHeader)
|
|
735 self.assertRaises(Exception, multifasta2SNPFile.getLineAsAHeader( lineToBeCheck ) , self._inFileName, self._obsSubSNPFileName)
|
|
736
|
|
737 def test_getLineAsAHeader_fatal_error_bad_header(self):
|
|
738 lineToBeCheck=">test\on bad header with fatal error"
|
|
739
|
|
740 batchName = "batch1"
|
|
741 taxon = "Arabidopsis thaliana"
|
|
742 gene = "methyltransferase"
|
|
743 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
744 try :
|
|
745 expHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck )
|
|
746 except Exception, e :
|
|
747 self.assertRaises(Exception, e , self._inFileName, self._obsSubSNPFileName)
|
|
748
|
|
749 def test_isHeaderInRefSeqList(self):
|
|
750 header = "line1"
|
|
751 bs1 = Bioseq( "line1", "A--ACCGAATATAC" )
|
|
752 bs2 = Bioseq( "line2", "AG-ACCGAAT--AC" )
|
|
753 bs3 = Bioseq( "line3", "ATTACCGCA-----" )
|
|
754
|
|
755 batchName = "batch1"
|
|
756 taxon = "Arabidopsis thaliana"
|
|
757 gene = "methyltransferase"
|
|
758
|
|
759 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
760 multifasta2SNPFile._lRefSequences = [bs1, bs2, bs3]
|
|
761 try:
|
|
762 isHeader = multifasta2SNPFile.isHeaderInRefSeqList(header)
|
|
763 except Exception, e :
|
|
764 self.assertRaises(Exception, e)
|
|
765
|
|
766 def test_completeAlleleSetWithCurrentAllele_one_allele_added(self):
|
|
767 dAlleleSetInInput = {"A" : 1,
|
|
768 "T" : 2,
|
|
769 "G" : 3}
|
|
770 alleleToAdd = "C"
|
|
771 dAlleleExpSet = {"A" : 1,
|
|
772 "T" : 2,
|
|
773 "G" : 3,
|
|
774 "C" : 4}
|
|
775 batchName = "batch1"
|
|
776 taxon = "Arabidopsis thaliana"
|
|
777 gene = "methyltransferase"
|
|
778 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
779 dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd)
|
|
780 self.assertEquals(dAlleleObsSet, dAlleleExpSet)
|
|
781
|
|
782 def test_completeAlleleSetWithCurrentAllele_no_allele_added(self):
|
|
783 dAlleleSetInInput = {"A" : 1,
|
|
784 "T" : 2,
|
|
785 "G" : 3}
|
|
786 alleleToAdd = "T"
|
|
787 dAlleleExpSet = {"A" : 1,
|
|
788 "T" : 2,
|
|
789 "G" : 3}
|
|
790 batchName = "batch1"
|
|
791 taxon = "Arabidopsis thaliana"
|
|
792 gene = "methyltransferase"
|
|
793 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
794 dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd)
|
|
795 self.assertEquals(dAlleleObsSet, dAlleleExpSet)
|
|
796
|
|
797 def test_completeAlleleSetWithCurrentAllele_with_an_empty_allele_set(self):
|
|
798 dAlleleSetInInput = {}
|
|
799 alleleToAdd = "T"
|
|
800 dAlleleExpSet = {"T" : 1}
|
|
801 batchName = "batch1"
|
|
802 taxon = "Arabidopsis thaliana"
|
|
803 gene = "methyltransferase"
|
|
804 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
805 dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd)
|
|
806 self.assertEquals(dAlleleObsSet, dAlleleExpSet)
|
|
807
|
|
808 def test_completeBatchLineListWithCurrentIndividual(self):
|
|
809 #TODO: this test only pass with a batchNumber of 1
|
|
810 iCurrentBatchNumber = 1
|
|
811 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': iCurrentBatchNumber},
|
|
812 {'IndividualNumber': "2", 'BatchNumber': iCurrentBatchNumber}]
|
|
813 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"},
|
|
814 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"},
|
|
815 {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}]
|
|
816 lExpBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': iCurrentBatchNumber},
|
|
817 {'IndividualNumber': "2", 'BatchNumber': iCurrentBatchNumber},
|
|
818 {'IndividualNumber': "3", 'BatchNumber': iCurrentBatchNumber}]
|
|
819 lineName2Add = "Individual3"
|
|
820 batchName = "batch1"
|
|
821 taxon = "Arabidopsis thaliana"
|
|
822 gene = "methyltransferase"
|
|
823 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
824 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add)
|
|
825 self.assertEquals(lBatchLineResults, lExpBatchLineResults)
|
|
826
|
|
827 def test_completeBatchLineListWithCurrentIndividual_no_entries_in_batchline_results_in_input(self):
|
|
828 lBatchLineResults = []
|
|
829 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"},
|
|
830 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"},
|
|
831 {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}]
|
|
832 lExpBatchLineResults = [{'IndividualNumber': "2", 'BatchNumber': 1}]
|
|
833 lineName2Add = "Individual2"
|
|
834 batchName = "batch1"
|
|
835 taxon = "Arabidopsis thaliana"
|
|
836 gene = "methyltransferase"
|
|
837 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
838 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add)
|
|
839 self.assertEquals(lBatchLineResults, lExpBatchLineResults)
|
|
840
|
|
841 def test_completeBatchLineListWithCurrentIndividual_no_individual_in_individualList(self):
|
|
842 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': 1},
|
|
843 {'IndividualNumber': "2", 'BatchNumber': 1}]
|
|
844 lIndividualResults = []
|
|
845
|
|
846 lineName2Add = "Individual3"
|
|
847 batchName = "batch1"
|
|
848 taxon = "Arabidopsis thaliana"
|
|
849 gene = "methyltransferase"
|
|
850 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
851 try:
|
|
852 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add)
|
|
853 except Exception, e :
|
|
854 self.assertRaises(Exception, e)
|
|
855
|
|
856 def test_completeBatchLineListWithCurrentIndividual_individual_added_has_no_individual_number(self):
|
|
857 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': "1"},
|
|
858 {'IndividualNumber': "2", 'BatchNumber': "1"}]
|
|
859 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"},
|
|
860 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"},
|
|
861 {'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}]
|
|
862
|
|
863 lineName2Add = "Individual3"
|
|
864 batchName = "batch1"
|
|
865 taxon = "Arabidopsis thaliana"
|
|
866 gene = "methyltransferase"
|
|
867 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
868 try:
|
|
869 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add)
|
|
870 except Exception, e :
|
|
871 self.assertRaises(Exception, e)
|
|
872
|
|
873 def test_completeBatchLineListWithCurrentIndividual_individual_not_present_in_individualList(self):
|
|
874 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': "1"},
|
|
875 {'IndividualNumber': "2", 'BatchNumber': "1"}]
|
|
876 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"},
|
|
877 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"},
|
|
878 {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}]
|
|
879
|
|
880 lineName2Add = "Michael Corleone"
|
|
881 batchName = "batch1"
|
|
882 taxon = "Arabidopsis thaliana"
|
|
883 gene = "methyltransferase"
|
|
884 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
|
|
885 try:
|
|
886 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add)
|
|
887 except Exception, e :
|
|
888 self.assertRaises(Exception, e)
|
|
889
|
|
890 def test_findASubSNPInAListWithHisName(self):
|
|
891 lSubSNPList = [{'subSNPName': "SubSNP_batch1_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"},
|
|
892 {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"},
|
|
893 {'subSNPName': "SubSNP_batch1_6_line1", 'position': 6, 'lineName': 1, 'allele': 3, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP"}]
|
|
894 name = "SubSNP_batch1_2_line1"
|
|
895
|
|
896 dExpSubSNP = {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"}
|
|
897 expIndice = 1
|
|
898
|
|
899 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
900
|
|
901 dObsSubSNP, obsIndice = multifasta2SNPFile.findASubSNPInAListWithHisName(name, lSubSNPList)
|
|
902
|
|
903 self.assertEquals(expIndice, obsIndice)
|
|
904 self.assertEquals(dExpSubSNP, dObsSubSNP)
|
|
905
|
|
906 def test_findASubSNPInAListWithHisName_SubSNP_not_found(self):
|
|
907 lSubSNPList = [{'subSNPName': "SubSNP_batch1_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"},
|
|
908 {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"},
|
|
909 {'subSNPName': "SubSNP_batch1_6_line1", 'position': 6, 'lineName': 1, 'allele': 3, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP"}]
|
|
910 name = "SubSNP_fake"
|
|
911
|
|
912 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
913
|
|
914 try:
|
|
915 dObsSubSNP, obsIndice = multifasta2SNPFile.findASubSNPInAListWithHisName(name, lSubSNPList)
|
|
916 except Exception, e :
|
|
917 self.assertRaises(Exception, e)
|
|
918
|
|
919 def test_clusteriseIndels(self):
|
|
920 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
921 lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
|
|
922 {'name' : "indel2", 'start': 12, 'end': 15},
|
|
923 {'name' : "indel3",'start': 5, 'end': 10}]
|
|
924 dIndel = {'start': 1, 'end': 6}
|
|
925
|
|
926 lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList)
|
|
927 lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 10},
|
|
928 {'name' : "indel2", 'start': 12, 'end': 15},
|
|
929 {'name' : "indel3", 'start': 1, 'end': 10}]
|
|
930
|
|
931 self.assertEquals(lexpIndelsList, lObsIndelsList)
|
|
932
|
|
933 def test_clusteriseIndels_no_overlap(self):
|
|
934 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
935 lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
|
|
936 {'name' : "indel2", 'start': 12, 'end': 15},
|
|
937 {'name' : "indel3",'start': 25, 'end': 30}]
|
|
938 dIndel = {'start': 1, 'end': 6}
|
|
939
|
|
940 lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList)
|
|
941 lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
|
|
942 {'name' : "indel2", 'start': 12, 'end': 15},
|
|
943 {'name' : "indel3", 'start': 25, 'end': 30}]
|
|
944
|
|
945 self.assertEquals(lexpIndelsList, lObsIndelsList)
|
|
946
|
|
947 def test_clusteriseIndels_many_overlaps_complicated(self):
|
|
948 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
949 lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
|
|
950 {'name' : "indel2", 'start': 12, 'end': 15},
|
|
951 {'name' : "indel3",'start': 5, 'end': 10},
|
|
952 {'name' : "indel4",'start': 9, 'end': 40}]
|
|
953 dIndel = {'start': 5, 'end': 10}
|
|
954
|
|
955 lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList)
|
|
956 lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 40},
|
|
957 {'name' : "indel2", 'start': 1, 'end': 40},
|
|
958 {'name' : "indel3", 'start': 1, 'end': 40},
|
|
959 {'name' : "indel4",'start': 1, 'end': 40}]
|
|
960
|
|
961 self.assertEquals(lexpIndelsList, lObsIndelsList)
|
|
962
|
|
963 def test_updateBoundsForAnIndelInAnIndelList(self):
|
|
964 lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
|
|
965 {'name' : "indel2", 'start': 12, 'end': 15},
|
|
966 {'name' : "indel3",'start': 5, 'end': 10},
|
|
967 {'name' : "indel4",'start': 9, 'end': 40}]
|
|
968 dIndelWithNewBounds = {'name': "indel2", 'start': 7, 'end': 19}
|
|
969 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
970 lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds)
|
|
971 lExpNewIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
|
|
972 {'name' : "indel2", 'start': 7, 'end': 19},
|
|
973 {'name' : "indel3",'start': 5, 'end': 10},
|
|
974 {'name' : "indel4",'start': 9, 'end': 40}]
|
|
975 self.assertEquals(lExpNewIndelsList, lObsNewIndelsList)
|
|
976
|
|
977 def test_updateBoundsForAnIndelInAnIndelList_no_update_to_do(self):
|
|
978 lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
|
|
979 {'name' : "indel2", 'start': 12, 'end': 15},
|
|
980 {'name' : "indel3",'start': 5, 'end': 10},
|
|
981 {'name' : "indel4",'start': 9, 'end': 40}]
|
|
982 dIndelWithNewBounds = {'name': "indel2", 'start': 12, 'end': 15}
|
|
983 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
984 lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds)
|
|
985 lExpNewIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
|
|
986 {'name' : "indel2", 'start': 12, 'end': 15},
|
|
987 {'name' : "indel3",'start': 5, 'end': 10},
|
|
988 {'name' : "indel4",'start': 9, 'end': 40}]
|
|
989 self.assertEquals(lExpNewIndelsList, lObsNewIndelsList)
|
|
990
|
|
991 def test_updateBoundsForAnIndelInAnIndelList_indel_2_update_does_not_exist(self):
|
|
992 lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
|
|
993 {'name' : "indel2", 'start': 12, 'end': 15},
|
|
994 {'name' : "indel3",'start': 5, 'end': 10},
|
|
995 {'name' : "indel4",'start': 9, 'end': 40}]
|
|
996 dIndelWithNewBounds = {'name': "DeNiro", 'start': 12, 'end': 15}
|
|
997 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
998 try:
|
|
999 lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds)
|
|
1000 except Exception, e :
|
|
1001 self.assertRaises(Exception, e)
|
|
1002
|
|
1003 def test_mergeBoundsFor2Indels(self):
|
|
1004 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1005 dIndel1 = {'start': 1, 'end': 4}
|
|
1006 dIndel2 = {'start': 2, 'end': 15}
|
|
1007 dIndel1, dIndel2 = multifasta2SNPFile.mergeBoundsForTwoOverlappingIndels(dIndel1, dIndel2)
|
|
1008 dExpIndel1 = {'start': 1, 'end': 15}
|
|
1009 dExpIndel2 = {'start': 1, 'end': 15}
|
|
1010 self.assertEquals(dExpIndel1, dIndel1)
|
|
1011 self.assertEquals(dExpIndel2, dIndel2)
|
|
1012
|
|
1013 def test_mergeBoundsFor2Indels_no_overlap(self):
|
|
1014 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1015 dIndel1 = {'start': 1, 'end': 4}
|
|
1016 dIndel2 = {'start': 5, 'end': 15}
|
|
1017 dIndel1, dIndel2 = multifasta2SNPFile.mergeBoundsForTwoOverlappingIndels(dIndel1, dIndel2)
|
|
1018 dExpIndel1 = {'start': 1, 'end': 4}
|
|
1019 dExpIndel2 = {'start': 5, 'end': 15}
|
|
1020 self.assertEquals(dExpIndel1, dIndel1)
|
|
1021 self.assertEquals(dExpIndel2, dIndel2)
|
|
1022
|
|
1023 def test_getUngappedPositionInRefSeq(self):
|
|
1024 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1025 refBioseq = Bioseq()
|
|
1026 alignedBioseqDB = BioseqDB()
|
|
1027 refBioseq.sequence = "A--TTACC-GAA"
|
|
1028 refBioseq.header = "reference"
|
|
1029 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1030 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
|
|
1031
|
|
1032 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
1033
|
|
1034 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
1035
|
|
1036 expUngappedPositionFor1 = 1
|
|
1037 obsUngappedPositionFor1 = multifasta2SNPFile.getUngappedPositionInRefSeq(1)
|
|
1038 expUngappedPositionFor5 = 3
|
|
1039 obsUngappedPositionFor5 = multifasta2SNPFile.getUngappedPositionInRefSeq(5)
|
|
1040 expUngappedPositionFor10 = 7
|
|
1041 obsUngappedPositionFor10 = multifasta2SNPFile.getUngappedPositionInRefSeq(10)
|
|
1042
|
|
1043 self.assertEquals(expUngappedPositionFor1, obsUngappedPositionFor1)
|
|
1044 self.assertEquals(expUngappedPositionFor5, obsUngappedPositionFor5)
|
|
1045 self.assertEquals(expUngappedPositionFor10, obsUngappedPositionFor10)
|
|
1046
|
|
1047 def test_getUngappedPositionInRefSeq_no_gap(self):
|
|
1048 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1049 refBioseq = Bioseq()
|
|
1050 alignedBioseqDB = BioseqDB()
|
|
1051 refBioseq.sequence = "AACTTACCAGAA"
|
|
1052 refBioseq.header = "reference"
|
|
1053 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1054 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
|
|
1055
|
|
1056 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
1057
|
|
1058 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
1059
|
|
1060 expUngappedPositionFor1 = 1
|
|
1061 obsUngappedPositionFor1 = multifasta2SNPFile.getUngappedPositionInRefSeq(1)
|
|
1062 expUngappedPositionFor5 = 5
|
|
1063 obsUngappedPositionFor5 = multifasta2SNPFile.getUngappedPositionInRefSeq(5)
|
|
1064 expUngappedPositionFor10 = 10
|
|
1065 obsUngappedPositionFor10 = multifasta2SNPFile.getUngappedPositionInRefSeq(10)
|
|
1066
|
|
1067 self.assertEquals(expUngappedPositionFor1, obsUngappedPositionFor1)
|
|
1068 self.assertEquals(expUngappedPositionFor5, obsUngappedPositionFor5)
|
|
1069 self.assertEquals(expUngappedPositionFor10, obsUngappedPositionFor10)
|
|
1070
|
|
1071 def test_checkAllSeq_sequences_with_different_sizes_one_seq_longer(self):
|
|
1072 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1073 refBioseq = Bioseq()
|
|
1074 alignedBioseqDB = BioseqDB()
|
|
1075 refBioseq.sequence = "AACTTACCAGAA"
|
|
1076 refBioseq.header = "reference"
|
|
1077 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1078 bs2 = Bioseq( "line2", "AACTTACC-GAATTTC" )
|
|
1079
|
|
1080 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
1081
|
|
1082 try:
|
|
1083 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
1084 except Exception, e :
|
|
1085 self.assertRaises(Exception, e)
|
|
1086 obsMsg = e.message
|
|
1087 expMsg = "File: " + self._inFileName + ", problem with the sequence " + bs2.header + ": its length is different from the reference seq! All the sequences must have the same length.\n"
|
|
1088 expMsg += "refseq length: " + str(len(refBioseq.sequence)) + "\n"
|
|
1089 expMsg += "seq length: " + str(len(bs2.sequence)) + "\n"
|
|
1090 self.assertEquals(expMsg, obsMsg)
|
|
1091
|
|
1092 def test_checkAllSeq_sequences_with_different_sizes_one_seq_shorter(self):
|
|
1093 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1094 refBioseq = Bioseq()
|
|
1095 alignedBioseqDB = BioseqDB()
|
|
1096 refBioseq.sequence = "AACTTACCAGAA"
|
|
1097 refBioseq.header = "reference"
|
|
1098 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1099 bs2 = Bioseq( "line2", "AACTTACC" )
|
|
1100
|
|
1101 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
1102
|
|
1103 try:
|
|
1104 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
1105 except Exception, e :
|
|
1106 self.assertRaises(Exception, e)
|
|
1107 obsMsg = e.message
|
|
1108 expMsg = "File: " + self._inFileName + ", problem with the sequence " + bs2.header + ": its length is different from the reference seq! All the sequences must have the same length.\n"
|
|
1109 expMsg += "refseq length: " + str(len(refBioseq.sequence)) + "\n"
|
|
1110 expMsg += "seq length: " + str(len(bs2.sequence)) + "\n"
|
|
1111 self.assertEquals(expMsg, obsMsg)
|
|
1112
|
|
1113
|
|
1114 def test_getFlanksOfASubSNP(self):
|
|
1115 refBioseq = Bioseq()
|
|
1116 alignedBioseqDB = BioseqDB()
|
|
1117 refBioseq.sequence = "AACTTACCAGAA"
|
|
1118 refBioseq.header = "reference"
|
|
1119 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1120 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
|
|
1121 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
1122 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1123 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
1124 subsnpPosition = 3
|
|
1125 polymLength = 3
|
|
1126 lineName = "line1"
|
|
1127 exp5flank = "AA"
|
|
1128 exp3flank = "TCCAGAA"
|
|
1129
|
|
1130 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 7)
|
|
1131 self.assertEquals(exp5flank, obs5flank)
|
|
1132 self.assertEquals(exp3flank, obs3flank)
|
|
1133
|
|
1134 def test_getFlanksOfASubSNP_flank_truncated(self):
|
|
1135 refBioseq = Bioseq()
|
|
1136 alignedBioseqDB = BioseqDB()
|
|
1137 refBioseq.sequence = "AACTTACCAGAA"
|
|
1138 refBioseq.header = "reference"
|
|
1139 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1140 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
|
|
1141 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
1142 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1143 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
1144 subsnpPosition = 3
|
|
1145 polymLength = 3
|
|
1146 lineName = "line1"
|
|
1147 exp5flank = "AA"
|
|
1148 exp3flank = "TCCAGAA"
|
|
1149
|
|
1150 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500)
|
|
1151 self.assertEquals(exp5flank, obs5flank)
|
|
1152 self.assertEquals(exp3flank, obs3flank)
|
|
1153
|
|
1154 def test_getFlanksOfASubSNP_empty_seq(self):
|
|
1155 refBioseq = Bioseq()
|
|
1156 alignedBioseqDB = BioseqDB()
|
|
1157 refBioseq.sequence = ""
|
|
1158 refBioseq.header = "reference"
|
|
1159 bs1 = Bioseq( "line1", "" )
|
|
1160 bs2 = Bioseq( "line2", "" )
|
|
1161 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
1162 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1163 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
1164 subsnpPosition = 3
|
|
1165 polymLength = 3
|
|
1166 lineName = "line1"
|
|
1167 exp5flank = ""
|
|
1168 exp3flank = ""
|
|
1169
|
|
1170 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500)
|
|
1171 self.assertEquals(exp5flank, obs5flank)
|
|
1172 self.assertEquals(exp3flank, obs3flank)
|
|
1173
|
|
1174 def test_getFlanksOfASubSNP_flank_of_first_base(self):
|
|
1175 refBioseq = Bioseq()
|
|
1176 alignedBioseqDB = BioseqDB()
|
|
1177 refBioseq.sequence = "AACTTACCAGAA"
|
|
1178 refBioseq.header = "reference"
|
|
1179 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1180 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
|
|
1181 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
1182 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1183 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
1184 subsnpPosition = 1
|
|
1185 polymLength = 1
|
|
1186 lineName = "line1"
|
|
1187 exp5flank = ""
|
|
1188 exp3flank = "ACTTTCCAGAA"
|
|
1189
|
|
1190 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500)
|
|
1191 self.assertEquals(exp5flank, obs5flank)
|
|
1192 self.assertEquals(exp3flank, obs3flank)
|
|
1193
|
|
1194 def test_getFlanksOfASubSNP_flank_of_first_base_with_polym_on_all_sequence(self):
|
|
1195 refBioseq = Bioseq()
|
|
1196 alignedBioseqDB = BioseqDB()
|
|
1197 refBioseq.sequence = "AACTTACCAGAA"
|
|
1198 refBioseq.header = "reference"
|
|
1199 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1200 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
|
|
1201 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
1202 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1203 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
1204 subsnpPosition = 1
|
|
1205 polymLength = 12
|
|
1206 lineName = "line1"
|
|
1207 exp5flank = ""
|
|
1208 exp3flank = ""
|
|
1209 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500)
|
|
1210 self.assertEquals(exp5flank, obs5flank)
|
|
1211 self.assertEquals(exp3flank, obs3flank)
|
|
1212
|
|
1213 def test_getFlanksOfASubSNP_flank_of_last_base_with_polym_on_all_sequence(self):
|
|
1214 refBioseq = Bioseq()
|
|
1215 alignedBioseqDB = BioseqDB()
|
|
1216 refBioseq.sequence = "AACTTACCAGAA"
|
|
1217 refBioseq.header = "reference"
|
|
1218 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
|
|
1219 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
|
|
1220 alignedBioseqDB.setData( [ bs1, bs2 ] )
|
|
1221 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
|
|
1222 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
|
|
1223 subsnpPosition = 12
|
|
1224 polymLength = 1
|
|
1225 lineName = "line1"
|
|
1226 exp5flank = "AACTTTCCAGA"
|
|
1227 exp3flank = ""
|
|
1228 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500)
|
|
1229 self.assertEquals(exp5flank, obs5flank)
|
|
1230 self.assertEquals(exp3flank, obs3flank)
|
|
1231 #
|
|
1232 def test_subSNPExistsInSubSNPList_subSNP_exists(self):
|
|
1233 batchName = "batch1"
|
|
1234 lSubSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
|
|
1235 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
|
|
1236 {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
1237 {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
1238 {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
1239 {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
1240 {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
|
|
1241 {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}]
|
|
1242 multifasta2SNPFile = Multifasta2SNPFile(batchName, "gene1", "mouse")
|
|
1243
|
|
1244 dSearchedSubSNP = {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}
|
|
1245
|
|
1246 expResult = multifasta2SNPFile.subSNPExistsInSubSNPList(dSearchedSubSNP, lSubSNP)
|
|
1247 obsResult = True
|
|
1248
|
|
1249 self.assertEquals(expResult, obsResult)
|
|
1250
|
|
1251 def test_subSNPExistsInSubSNPList_subSNP_does_not_exist(self):
|
|
1252 batchName = "batch1"
|
|
1253 lSubSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
|
|
1254 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
|
|
1255 {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
1256 {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
1257 {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
1258 {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
|
|
1259 {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
|
|
1260 {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}]
|
|
1261 multifasta2SNPFile = Multifasta2SNPFile(batchName, "gene1", "mouse")
|
|
1262
|
|
1263 dSearchedSubSNP = {'subSNPName': batchName + "_DEL_12_line1", 'position': 12, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}
|
|
1264
|
|
1265 expResult = multifasta2SNPFile.subSNPExistsInSubSNPList(dSearchedSubSNP, lSubSNP)
|
|
1266 obsResult = False
|
|
1267
|
|
1268 self.assertEquals(expResult, obsResult)
|
|
1269
|
|
1270 def _writeExpSubSNPFile(self):
|
|
1271 expFileHandle = open(self._expSubSNPFileName, "w")
|
|
1272 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n")
|
|
1273 expFileHandle.write("Batch1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n")
|
|
1274 expFileHandle.write("Batch1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
|
|
1275 expFileHandle.write("Batch1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n")
|
|
1276 expFileHandle.write("Batch1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1277 expFileHandle.write("Batch1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
|
|
1278 expFileHandle.write("Batch1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1279 expFileHandle.write("Batch1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
|
|
1280 expFileHandle.write("Batch1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1281 expFileHandle.write("Batch1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n")
|
|
1282 expFileHandle.write("Batch1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n")
|
|
1283 expFileHandle.close()
|
|
1284
|
|
1285 def _writeExpSubSNPFileWithSnpsAndIndels(self):
|
|
1286 expFileHandle = open(self._expSubSNPFileName, "w")
|
|
1287 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n")
|
|
1288 expFileHandle.write("Batch1_INS_1_Line1;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;1;1;Sequence;;;8\n")
|
|
1289 expFileHandle.write("Batch1_INS_1_Line2;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;1;2;Sequence;;;6\n")
|
|
1290 expFileHandle.write("Batch1_SNP_2_Line1;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n")
|
|
1291 expFileHandle.write("Batch1_SNP_2_Line2;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
|
|
1292 expFileHandle.write("Batch1_DEL_8_Line1;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;1;1;Sequence;;;5\n")
|
|
1293 expFileHandle.write("Batch1_DEL_8_Line2;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;1;2;Sequence;;;7\n")
|
|
1294 expFileHandle.write("Batch1_SNP_19_Line1;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n")
|
|
1295 expFileHandle.write("Batch1_SNP_19_Line2;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1296 expFileHandle.write("Batch1_SNP_23_Line1;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
|
|
1297 expFileHandle.write("Batch1_SNP_23_Line2;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1298 expFileHandle.write("Batch1_SNP_34_Line1;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
|
|
1299 expFileHandle.write("Batch1_SNP_34_Line2;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1300 expFileHandle.write("Batch1_SNP_49_Line1;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n")
|
|
1301 expFileHandle.write("Batch1_SNP_49_Line2;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n")
|
|
1302 expFileHandle.close()
|
|
1303
|
|
1304 def _writeExpSubSNPFileSeveralBatches(self):
|
|
1305 expFileHandle = open(self._expSubSNPFileName, "w")
|
|
1306 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n")
|
|
1307 expFileHandle.write("Batch_Gene1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n")
|
|
1308 expFileHandle.write("Batch_Gene1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
|
|
1309 expFileHandle.write("Batch_Gene1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n")
|
|
1310 expFileHandle.write("Batch_Gene1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1311 expFileHandle.write("Batch_Gene1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
|
|
1312 expFileHandle.write("Batch_Gene1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1313 expFileHandle.write("Batch_Gene1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
|
|
1314 expFileHandle.write("Batch_Gene1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1315 expFileHandle.write("Batch_Gene1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n")
|
|
1316 expFileHandle.write("Batch_Gene1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n")
|
|
1317
|
|
1318 expFileHandle.write("Batch_Gene2_INS_1_Line1;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;2;1;Sequence;;;8\n")
|
|
1319 expFileHandle.write("Batch_Gene2_INS_1_Line2;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;2;2;Sequence;;;6\n")
|
|
1320 expFileHandle.write("Batch_Gene2_SNP_2_Line1;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;1\n")
|
|
1321 expFileHandle.write("Batch_Gene2_SNP_2_Line2;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;4\n")
|
|
1322 expFileHandle.write("Batch_Gene2_DEL_8_Line1;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;2;1;Sequence;;;5\n")
|
|
1323 expFileHandle.write("Batch_Gene2_DEL_8_Line2;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;2;2;Sequence;;;7\n")
|
|
1324 expFileHandle.write("Batch_Gene2_SNP_19_Line1;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;2\n")
|
|
1325 expFileHandle.write("Batch_Gene2_SNP_19_Line2;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;2\n")
|
|
1326 expFileHandle.write("Batch_Gene2_SNP_23_Line1;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;3\n")
|
|
1327 expFileHandle.write("Batch_Gene2_SNP_23_Line2;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;2\n")
|
|
1328 expFileHandle.write("Batch_Gene2_SNP_34_Line1;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;2;1;Sequence;;;3\n")
|
|
1329 expFileHandle.write("Batch_Gene2_SNP_34_Line2;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;2;2;Sequence;;;2\n")
|
|
1330 expFileHandle.write("Batch_Gene2_SNP_49_Line1;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;2;1;Sequence;;;2\n")
|
|
1331 expFileHandle.write("Batch_Gene2_SNP_49_Line2;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;2;2;Sequence;;;4\n")
|
|
1332 expFileHandle.close()
|
|
1333
|
|
1334 def _writeExpSubSNPFileSeveralBatches_different_lines_between_files(self):
|
|
1335 expFileHandle = open(self._expSubSNPFileName, "w")
|
|
1336 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n")
|
|
1337 expFileHandle.write("Batch_Gene1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n")
|
|
1338 expFileHandle.write("Batch_Gene1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
|
|
1339 expFileHandle.write("Batch_Gene1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n")
|
|
1340 expFileHandle.write("Batch_Gene1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1341 expFileHandle.write("Batch_Gene1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
|
|
1342 expFileHandle.write("Batch_Gene1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1343 expFileHandle.write("Batch_Gene1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
|
|
1344 expFileHandle.write("Batch_Gene1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1345 expFileHandle.write("Batch_Gene1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n")
|
|
1346 expFileHandle.write("Batch_Gene1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n")
|
|
1347
|
|
1348 expFileHandle.write("Batch_Gene2_INS_1_Line3;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;2;3;Sequence;;;8\n")
|
|
1349 expFileHandle.write("Batch_Gene2_INS_1_Line4;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;2;4;Sequence;;;6\n")
|
|
1350 expFileHandle.write("Batch_Gene2_SNP_2_Line3;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;1\n")
|
|
1351 expFileHandle.write("Batch_Gene2_SNP_2_Line4;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;4\n")
|
|
1352 expFileHandle.write("Batch_Gene2_DEL_8_Line3;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;2;3;Sequence;;;5\n")
|
|
1353 expFileHandle.write("Batch_Gene2_DEL_8_Line4;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;2;4;Sequence;;;7\n")
|
|
1354 expFileHandle.write("Batch_Gene2_SNP_19_Line3;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;2\n")
|
|
1355 expFileHandle.write("Batch_Gene2_SNP_19_Line4;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;2\n")
|
|
1356 expFileHandle.write("Batch_Gene2_SNP_23_Line3;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;3\n")
|
|
1357 expFileHandle.write("Batch_Gene2_SNP_23_Line4;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;2\n")
|
|
1358 expFileHandle.write("Batch_Gene2_SNP_34_Line3;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;2;3;Sequence;;;3\n")
|
|
1359 expFileHandle.write("Batch_Gene2_SNP_34_Line4;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;2;4;Sequence;;;2\n")
|
|
1360 expFileHandle.write("Batch_Gene2_SNP_49_Line3;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;2;3;Sequence;;;2\n")
|
|
1361 expFileHandle.write("Batch_Gene2_SNP_49_Line4;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;2;4;Sequence;;;4\n")
|
|
1362 expFileHandle.close()
|
|
1363
|
|
1364 def _writeExpSubSNPFileSeveralLineSeq(self):
|
|
1365 expFileHandle = open(self._expSubSNPFileName, "w")
|
|
1366 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n")
|
|
1367 expFileHandle.write("Batch1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;1\n")
|
|
1368 expFileHandle.write("Batch1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
|
|
1369 expFileHandle.write("Batch1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;2\n")
|
|
1370 expFileHandle.write("Batch1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1371 expFileHandle.write("Batch1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;3\n")
|
|
1372 expFileHandle.write("Batch1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1373 expFileHandle.write("Batch1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;3\n")
|
|
1374 expFileHandle.write("Batch1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
|
|
1375 expFileHandle.write("Batch1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;CGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;2\n")
|
|
1376 expFileHandle.write("Batch1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;CGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
|
|
1377 expFileHandle.close()
|
|
1378
|
|
1379
|
|
1380 def _writeExpAlleleFile(self):
|
|
1381 expFileHandle = open(self._expAlleleFileName, "w")
|
|
1382 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n")
|
|
1383 expFileHandle.write("1;T;;;\n")
|
|
1384 expFileHandle.write("2;C;;;\n")
|
|
1385 expFileHandle.write("3;G;;;\n")
|
|
1386 expFileHandle.write("4;A;;;\n")
|
|
1387 expFileHandle.close()
|
|
1388
|
|
1389 def _writeExpAlleleFileWithSnpsAndIndels(self):
|
|
1390 expFileHandle = open(self._expAlleleFileName, "w")
|
|
1391 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n")
|
|
1392 expFileHandle.write("1;T;;;\n")
|
|
1393 expFileHandle.write("2;C;;;\n")
|
|
1394 expFileHandle.write("3;G;;;\n")
|
|
1395 expFileHandle.write("4;A;;;\n")
|
|
1396 expFileHandle.write("5;---;;;\n")
|
|
1397 expFileHandle.write("6;CT;;;\n")
|
|
1398 expFileHandle.write("7;TT-;;;\n")
|
|
1399 expFileHandle.write("8;--;;;\n")
|
|
1400 expFileHandle.close()
|
|
1401
|
|
1402
|
|
1403 def _writeExpAlleleFileSeveralBatches(self):
|
|
1404 expFileHandle = open(self._expAlleleFileName, "w")
|
|
1405 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n")
|
|
1406 expFileHandle.write("1;T;;;\n")
|
|
1407 expFileHandle.write("2;C;;;\n")
|
|
1408 expFileHandle.write("3;G;;;\n")
|
|
1409 expFileHandle.write("4;A;;;\n")
|
|
1410 expFileHandle.write("5;---;;;\n")
|
|
1411 expFileHandle.write("6;CT;;;\n")
|
|
1412 expFileHandle.write("7;TT-;;;\n")
|
|
1413 expFileHandle.write("8;--;;;\n")
|
|
1414 expFileHandle.close()
|
|
1415
|
|
1416 def _writeExpIndividualFile(self):
|
|
1417 expFileHandle = open(self._expIndividualFileName, "w")
|
|
1418 expFileHandle.write("IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id\n")
|
|
1419 expFileHandle.write("1;Line1;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
|
|
1420 expFileHandle.write("2;Line2;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
|
|
1421 expFileHandle.close()
|
|
1422
|
|
1423 def _writeExpIndividualFile_different_lines_between_files(self):
|
|
1424 expFileHandle = open(self._expIndividualFileName, "w")
|
|
1425 expFileHandle.write("IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id\n")
|
|
1426 expFileHandle.write("1;Line1;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
|
|
1427 expFileHandle.write("2;Line2;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
|
|
1428 expFileHandle.write("3;Line3;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
|
|
1429 expFileHandle.write("4;Line4;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
|
|
1430 expFileHandle.close()
|
|
1431
|
|
1432 def _writeExpSequenceFile(self):
|
|
1433 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w")
|
|
1434 SequenceFSAFileHandle.write(">Sequence_de_Reference\n")
|
|
1435 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1436 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w")
|
|
1437 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
|
|
1438 SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n")
|
|
1439
|
|
1440 def _writeExpSequenceFileSeveralLineSeq(self):
|
|
1441 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w")
|
|
1442 SequenceFSAFileHandle.write(">Sequence_de_Reference\n")
|
|
1443 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n")
|
|
1444 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w")
|
|
1445 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
|
|
1446 SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n")
|
|
1447
|
|
1448 def _writeExpSequenceFileWithDeletion(self):
|
|
1449 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w")
|
|
1450 SequenceFSAFileHandle.write(">Sequence_de_Reference\n")
|
|
1451 SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1452 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w")
|
|
1453 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
|
|
1454 SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n")
|
|
1455
|
|
1456 def _writeExpSequenceSeveralBatches(self):
|
|
1457 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w")
|
|
1458 SequenceFSAFileHandle.write(">Sequence_de_Reference1\n")
|
|
1459 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1460 SequenceFSAFileHandle.write(">Sequence_de_Reference2\n")
|
|
1461 SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1462 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w")
|
|
1463 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
|
|
1464 SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n")
|
|
1465 SequenceCSVFileHandle.write("Sequence_de_Reference2;Reference;;;;;Arabidopsis thaliana\n")
|
|
1466
|
|
1467 def _writeExpSequenceSeveralBatchesForSameRefSeq(self):
|
|
1468 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w")
|
|
1469 SequenceFSAFileHandle.write(">Sequence_de_Reference1\n")
|
|
1470 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1471 SequenceFSAFileHandle.write(">Sequence_de_Reference1\n")
|
|
1472 SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1473 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w")
|
|
1474 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
|
|
1475 SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n")
|
|
1476 SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n")
|
|
1477
|
|
1478 def _writeExpBatchFile(self):
|
|
1479 BatchFileHandle = open(self._expBatchFileName, "w")
|
|
1480 BatchFileHandle.write("BatchNumber: 1\n")
|
|
1481 BatchFileHandle.write("BatchName: Batch1\n")
|
|
1482 BatchFileHandle.write("GeneName: methyltransferase\n")
|
|
1483 BatchFileHandle.write("Description: \n")
|
|
1484 BatchFileHandle.write("ContactNumber: 1\n")
|
|
1485 BatchFileHandle.write("ProtocolNumber: 1\n")
|
|
1486 BatchFileHandle.write("ThematicNumber: 1\n")
|
|
1487 BatchFileHandle.write("RefSeqName: Sequence_de_Reference\n")
|
|
1488 BatchFileHandle.write("AlignmentFileName: \n")
|
|
1489 BatchFileHandle.write("SeqName: \n")
|
|
1490 BatchFileHandle.write("//\n")
|
|
1491 BatchFileHandle.close()
|
|
1492
|
|
1493 def _writeExpBatchFileSeveralBatches(self):
|
|
1494 BatchFileHandle = open(self._expBatchFileName, "w")
|
|
1495 BatchFileHandle.write("BatchNumber: 1\n")
|
|
1496 BatchFileHandle.write("BatchName: Batch_Gene1\n")
|
|
1497 BatchFileHandle.write("GeneName: Gene1\n")
|
|
1498 BatchFileHandle.write("Description: \n")
|
|
1499 BatchFileHandle.write("ContactNumber: 1\n")
|
|
1500 BatchFileHandle.write("ProtocolNumber: 1\n")
|
|
1501 BatchFileHandle.write("ThematicNumber: 1\n")
|
|
1502 BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n")
|
|
1503 BatchFileHandle.write("AlignmentFileName: \n")
|
|
1504 BatchFileHandle.write("SeqName: \n")
|
|
1505 BatchFileHandle.write("//\n")
|
|
1506 BatchFileHandle.write("BatchNumber: 2\n")
|
|
1507 BatchFileHandle.write("BatchName: Batch_Gene2\n")
|
|
1508 BatchFileHandle.write("GeneName: Gene2\n")
|
|
1509 BatchFileHandle.write("Description: \n")
|
|
1510 BatchFileHandle.write("ContactNumber: 1\n")
|
|
1511 BatchFileHandle.write("ProtocolNumber: 1\n")
|
|
1512 BatchFileHandle.write("ThematicNumber: 1\n")
|
|
1513 BatchFileHandle.write("RefSeqName: Sequence_de_Reference2\n")
|
|
1514 BatchFileHandle.write("AlignmentFileName: \n")
|
|
1515 BatchFileHandle.write("SeqName: \n")
|
|
1516 BatchFileHandle.write("//\n")
|
|
1517 BatchFileHandle.close()
|
|
1518
|
|
1519 def _writeExpBatchFileSeveralBatchesForSameRefSeq(self):
|
|
1520 BatchFileHandle = open(self._expBatchFileName, "w")
|
|
1521 BatchFileHandle.write("BatchNumber: 1\n")
|
|
1522 BatchFileHandle.write("BatchName: Batch_Gene1\n")
|
|
1523 BatchFileHandle.write("GeneName: Gene1\n")
|
|
1524 BatchFileHandle.write("Description: \n")
|
|
1525 BatchFileHandle.write("ContactNumber: 1\n")
|
|
1526 BatchFileHandle.write("ProtocolNumber: 1\n")
|
|
1527 BatchFileHandle.write("ThematicNumber: 1\n")
|
|
1528 BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n")
|
|
1529 BatchFileHandle.write("AlignmentFileName: \n")
|
|
1530 BatchFileHandle.write("SeqName: \n")
|
|
1531 BatchFileHandle.write("//\n")
|
|
1532 BatchFileHandle.write("BatchNumber: 2\n")
|
|
1533 BatchFileHandle.write("BatchName: Batch_Gene2\n")
|
|
1534 BatchFileHandle.write("GeneName: Gene2\n")
|
|
1535 BatchFileHandle.write("Description: \n")
|
|
1536 BatchFileHandle.write("ContactNumber: 1\n")
|
|
1537 BatchFileHandle.write("ProtocolNumber: 1\n")
|
|
1538 BatchFileHandle.write("ThematicNumber: 1\n")
|
|
1539 BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n")
|
|
1540 BatchFileHandle.write("AlignmentFileName: \n")
|
|
1541 BatchFileHandle.write("SeqName: \n")
|
|
1542 BatchFileHandle.write("//\n")
|
|
1543 BatchFileHandle.close()
|
|
1544
|
|
1545
|
|
1546 BatchFileHandle.close()
|
|
1547
|
|
1548 def _writeExpBatchLineFile(self):
|
|
1549 BatchLineFileHandle = open(self._expBatchLineFileName, "w")
|
|
1550 BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n")
|
|
1551 BatchLineFileHandle.write("1;;;1;\n")
|
|
1552 BatchLineFileHandle.write("2;;;1;\n")
|
|
1553 BatchLineFileHandle.close()
|
|
1554
|
|
1555
|
|
1556 def _writeExpBatchLineFileSeveralBatches(self):
|
|
1557 BatchLineFileHandle = open(self._expBatchLineFileName, "w")
|
|
1558 BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n")
|
|
1559 BatchLineFileHandle.write("1;;;1;\n")
|
|
1560 BatchLineFileHandle.write("2;;;1;\n")
|
|
1561 BatchLineFileHandle.write("1;;;2;\n")
|
|
1562 BatchLineFileHandle.write("2;;;2;\n")
|
|
1563 BatchLineFileHandle.close()
|
|
1564
|
|
1565 def _writeExpBatchLineFileSeveralBatches_different_lines_between_files(self):
|
|
1566 BatchLineFileHandle = open(self._expBatchLineFileName, "w")
|
|
1567 BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n")
|
|
1568 BatchLineFileHandle.write("1;;;1;\n")
|
|
1569 BatchLineFileHandle.write("2;;;1;\n")
|
|
1570 BatchLineFileHandle.write("3;;;2;\n")
|
|
1571 BatchLineFileHandle.write("4;;;2;\n")
|
|
1572 BatchLineFileHandle.close()
|
|
1573
|
|
1574 def _writeInputFile(self):
|
|
1575 inFileHandle = open(self._inFileName, "w")
|
|
1576 inFileHandle.write(">Sequence_de_Reference\n")
|
|
1577 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1578 inFileHandle.write(">Line1\n")
|
|
1579 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
|
|
1580 inFileHandle.write(">Line2\n")
|
|
1581 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
|
|
1582 inFileHandle.close()
|
|
1583
|
|
1584 def _writeInputFileWithSnpsAndIndels(self):
|
|
1585 inFileHandle = open(self._inFileName, "w")
|
|
1586 inFileHandle.write(">Sequence_de_Reference\n")
|
|
1587 inFileHandle.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1588 inFileHandle.write(">Line1\n")
|
|
1589 inFileHandle.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
|
|
1590 inFileHandle.write(">Line2\n")
|
|
1591 inFileHandle.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
|
|
1592 inFileHandle.close()
|
|
1593
|
|
1594 def _writeInputFileWithSeqErrorsInRefSeq(self):
|
|
1595 inFileHandle = open(self._inFileName, "w")
|
|
1596 inFileHandle.write(">Sequence_de_Reference\n")
|
|
1597 inFileHandle.write("CCTA7GCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1598 inFileHandle.write(">Line1\n")
|
|
1599 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
|
|
1600 inFileHandle.write(">Line2\n")
|
|
1601 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
|
|
1602 inFileHandle.close()
|
|
1603
|
|
1604 def _writeInputFileWithSeqErrorsInOneLineSeq(self):
|
|
1605 inFileHandle = open(self._inFileName, "w")
|
|
1606 inFileHandle.write(">Sequence_de_Reference\n")
|
|
1607 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1608 inFileHandle.write(">Line1\n")
|
|
1609 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATXAAGGCAGTAGGCAAACCTCCACAATC\n")
|
|
1610 inFileHandle.write(">Line2\n")
|
|
1611 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
|
|
1612 inFileHandle.close()
|
|
1613
|
|
1614 def _writeInputFileWithASeveralLineSeq(self):
|
|
1615 inFileHandle = open(self._inFileName, "w")
|
|
1616 inFileHandle.write(">Sequence_de_Reference\n")
|
|
1617 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n")
|
|
1618 inFileHandle.write(">Line1\n")
|
|
1619 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n")
|
|
1620 inFileHandle.write(">Line2\n")
|
|
1621 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA\nCGCAGTAGCCAAA\nCCTCCACAATA\n")
|
|
1622 inFileHandle.close()
|
|
1623
|
|
1624
|
|
1625 def _writeInputFileWithUpcaseAndLowcaseNucleotide(self):
|
|
1626 inFileHandle = open(self._inFileName, "w")
|
|
1627 inFileHandle.write(">Sequence_de_Reference\n")
|
|
1628 inFileHandle.write("CCTAAGCCATTGCTTGGtGATTATGAAGgCAGTAGTCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n")
|
|
1629 inFileHandle.write(">Line1\n")
|
|
1630 inFileHandle.write("CCTTAGCCATTGCtTGGTGACTATGAAGGcAGTAGGCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n")
|
|
1631 inFileHandle.write(">Line2\n")
|
|
1632 inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\nCGCAGTAGCCAAA\nCCTCCACAATA\n")
|
|
1633 inFileHandle.close()
|
|
1634
|
|
1635 def _writeInputFileWith2SeqsWithTheSameName(self):
|
|
1636 inFileHandle = open(self._inFileName, "w")
|
|
1637 inFileHandle.write(">Sequence_de_Reference\n")
|
|
1638 inFileHandle.write("CCTAAGCCATTGCTTGGtGATTATGAAGgCAGTAGTCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n")
|
|
1639 inFileHandle.write(">Line1\n")
|
|
1640 inFileHandle.write("CCTTAGCCATTGCtTGGTGACTATGAAGGcAGTAGGCAAACCTCCACAATC\n")
|
|
1641 inFileHandle.write(">Line2\n")
|
|
1642 inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\n")
|
|
1643 inFileHandle.write(">Line2\n")
|
|
1644 inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\n")
|
|
1645 inFileHandle.close()
|
|
1646
|
|
1647 def _writeInputFileBatchWithPotentialDooblons(self):
|
|
1648 inFileHandle = open(self._inFileName, "w")
|
|
1649 inFileHandle.write(">AU247387ref\n")
|
|
1650 inFileHandle.write("CACTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGTTCTGGTTACTCTTCAATTTGGGCATGCTTAATTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTTTATAGCCTGCTCACCAACATGTGATCTGTTCTTTGTATGCTCAGGTGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCACTTGCTATACTGTCTATTTGTTCGCAGGTGATTGCAGGTCTGTGGATCCTCTGGGTTCTTTCTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTGTGCTGCAACAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCATACGAGTTGTGAACTGATGACATCCTCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG\n")
|
|
1651 inFileHandle.write(">10102\n")
|
|
1652 inFileHandle.write("NNNtatagctcctaacattcctgaagtgaagatcacrgaggacnnggctgtcaatgttgcccgctcgctgagatatgagatcaacaggggcttygctagcttgagggcgattggNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1653 inFileHandle.write(">10954\n")
|
|
1654 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1655 inFileHandle.write(">ABERAVON\n")
|
|
1656 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1657 inFileHandle.write(">CARILLON\n")
|
|
1658 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatcggtcagggccgtgatctgaagaaattcctcattgtatgttctggttactcttcaatttgggcatgcttaat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctgttctttgtatgctcaggtggttgccgg---------------------------------------------------------------------------------------------------cctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaat------------------tttaacttgtgctgcaacacttgagttcataaccaccctag------ttgtccatacgagttgtgaactgatgacatccgttctttttcccragtgcagtcttcgtggtgctctacacggtgccagttctgtatgaNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1659 inFileHandle.write(">CONCERTO\n")
|
|
1660 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacttgcagtcttcatggttctctacactgtgccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1661 inFileHandle.write(">F14-13\n")
|
|
1662 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtsaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaaNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1663 inFileHandle.write(">GAGNY\n")
|
|
1664 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatyggtcagggccgtgayctgaagaaattcctsattgtaygtNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1665 inFileHandle.write(">GREECE\n")
|
|
1666 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtsaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatyggycagggccgtgatctgaagaaattcctcattgtatgtNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1667 inFileHandle.write(">IMAGINE\n")
|
|
1668 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacttgcagtcttcatggttctctacactgtgccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1669 inFileHandle.write(">IRELAND\n")
|
|
1670 inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGACGATTCCAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGTTGCTGG---------------------------------------------------------------------------------------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACNTTGGCATATATAGGTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1671 inFileHandle.write(">NEMOF\n")
|
|
1672 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1673 inFileHandle.write(">NEMOH\n")
|
|
1674 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1675 inFileHandle.write(">POLAND\n")
|
|
1676 inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1677 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1678 inFileHandle.write(">SPAIN\n")
|
|
1679 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatcggtcagggccgtgatctgaagaaattcctcattgtatgttctggttactcttcaatttgggcatgcttaat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctgttctttgtatgctcaggtggttgccgg---------------------------------------------------------------------------------------------------cctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaat------------------tttaacttgtgctgcaacacttgagttcataaccaccctag------ttgtccatacgagttgtgaactgatgacatccgttctttttcccgagtgcagtcttcgtggtgctctacacggtgccagttctgtatgagaagtacgacgacaaggttgatgcttttggtgagaag\n")
|
|
1680 inFileHandle.write(">TRANSATE\n")
|
|
1681 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatYggccagggtcgcgacctcaagaaattcctcattgtatgttgcttgt-ctcttcaatttcaacatgcttgat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctcttctttgtatgctcaggtggttgcggg---------------------------------------------------------------------------------------------------tctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaaK------------------tataRcttgtgctgcaacacttgagttcataaccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
|
|
1682 inFileHandle.write(">VIGOR\n")
|
|
1683 inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCACTTGCTATACTGTCTATTTGTTCGCAGGTGATTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG\n")
|
|
1684 inFileHandle.close()
|
|
1685
|
|
1686 def _writeRealExpAlleleFile(self):
|
|
1687 expFileHandle = open(self._expAlleleFileName, "w")
|
|
1688 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n")
|
|
1689 expFileHandle.write("1;G;;;\n")
|
|
1690 expFileHandle.write("2;T;;;\n")
|
|
1691 expFileHandle.write("3;A;;;\n")
|
|
1692 expFileHandle.write("4;C;;;\n")
|
|
1693 expFileHandle.write("5;-;;;\n")
|
|
1694 expFileHandle.close();
|
|
1695
|
|
1696 def _writeRealExpSequenceCSVFile(self):
|
|
1697 SequenceFSAFileHandle = open(self._expSequenceCSVFileName, "w")
|
|
1698 SequenceFSAFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
|
|
1699 SequenceFSAFileHandle.write("PpHDZ31_ref;Reference;;;;;Pinus pinaster\n")
|
|
1700 SequenceFSAFileHandle.close()
|
|
1701
|
|
1702 def _writeRealExpBatchFile(self):
|
|
1703 FileHandle = open(self._expBatchFileName, "w")
|
|
1704 FileHandle.write("BatchNumber: 1\n")
|
|
1705 FileHandle.write("BatchName: INRA_Pinus_pinaster_HDZ31-1\n")
|
|
1706 FileHandle.write("GeneName: PpHDZ31\n")
|
|
1707 FileHandle.write("Description: \n")
|
|
1708 FileHandle.write("ContactNumber: 1\n")
|
|
1709 FileHandle.write("ProtocolNumber: 1\n")
|
|
1710 FileHandle.write("ThematicNumber: 1\n")
|
|
1711 FileHandle.write("RefSeqName: PpHDZ31_ref\n")
|
|
1712 FileHandle.write("AlignmentFileName: \n")
|
|
1713 FileHandle.write("SeqName: \n")
|
|
1714 FileHandle.write("//\n")
|
|
1715 FileHandle.close()
|
|
1716
|
|
1717
|
|
1718 def _writeInputFileSeveralBatches(self):
|
|
1719 if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):
|
|
1720 os.mkdir(self._inputDirSeveralBatches)
|
|
1721
|
|
1722 inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")
|
|
1723 inFileHandle.write(">Sequence_de_Reference1\n")
|
|
1724 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1725 inFileHandle.write(">Line1\n")
|
|
1726 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
|
|
1727 inFileHandle.write(">Line2\n")
|
|
1728 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
|
|
1729 inFileHandle.close()
|
|
1730
|
|
1731 inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")
|
|
1732 inFileHandle2.write(">Sequence_de_Reference2\n")
|
|
1733 inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1734 inFileHandle2.write(">Line1\n")
|
|
1735 inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
|
|
1736 inFileHandle2.write(">Line2\n")
|
|
1737 inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
|
|
1738 inFileHandle2.close()
|
|
1739
|
|
1740 def _writeInputFileSeveralBatches_different_lines_between_files(self):
|
|
1741 if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):
|
|
1742 os.mkdir(self._inputDirSeveralBatches)
|
|
1743
|
|
1744 inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")
|
|
1745 inFileHandle.write(">Sequence_de_Reference1\n")
|
|
1746 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1747 inFileHandle.write(">Line1\n")
|
|
1748 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
|
|
1749 inFileHandle.write(">Line2\n")
|
|
1750 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
|
|
1751 inFileHandle.close()
|
|
1752
|
|
1753 inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")
|
|
1754 inFileHandle2.write(">Sequence_de_Reference2\n")
|
|
1755 inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1756 inFileHandle2.write(">Line3\n")
|
|
1757 inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
|
|
1758 inFileHandle2.write(">Line4\n")
|
|
1759 inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
|
|
1760 inFileHandle2.close()
|
|
1761
|
|
1762 def _writeInputFileSeveralBatches_different_lines_and_same_refseq_between_files(self):
|
|
1763 if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):
|
|
1764 os.mkdir(self._inputDirSeveralBatches)
|
|
1765
|
|
1766 inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")
|
|
1767 inFileHandle.write(">Sequence_de_Reference1\n")
|
|
1768 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1769 inFileHandle.write(">Line1\n")
|
|
1770 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
|
|
1771 inFileHandle.write(">Line2\n")
|
|
1772 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
|
|
1773 inFileHandle.close()
|
|
1774
|
|
1775 inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")
|
|
1776 inFileHandle2.write(">Sequence_de_Reference1\n")
|
|
1777 inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
|
|
1778 inFileHandle2.write(">Line3\n")
|
|
1779 inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
|
|
1780 inFileHandle2.write(">Line4\n")
|
|
1781 inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
|
|
1782 inFileHandle2.close()
|
|
1783
|
|
1784
|
|
1785 if __name__ == "__main__":
|
|
1786 unittest.main() |