comparison commons/core/parsing/test/Test_Multifasta2SNPFile.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 import os
2 import shutil
3 import unittest
4 from commons.core.utils.FileUtils import FileUtils
5 from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile
6 from commons.core.parsing.Multifasta2SNPFile import ReferenceBioseqAndLinesBioseqDBWrapper
7 from commons.core.seq.Bioseq import Bioseq
8 from commons.core.seq.BioseqDB import BioseqDB
9 from smac_pipe.tests.Utils4Test import Utils4Test
10
11
12 class Test_Multifasta2SNPFile(unittest.TestCase):
13 # TODO TEST LOGFILE
14 def setUp(self):
15 os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"])
16 self._inFileName = "multifasta_input.fasta"
17
18 self._expSubSNPFileName = "%s/commons/core/parsing/test/expSubSNP.csv" % os.environ["REPET_PATH"]
19 self._expAlleleFileName = "%s/commons/core/parsing/test/expAllele.csv" % os.environ["REPET_PATH"]
20
21 self._expIndividualFileName = "%s/commons/core/parsing/test/expIndividual.csv" % os.environ["REPET_PATH"]
22 self._expSequenceFSAFileName = "%s/commons/core/parsing/test/expSequences.fsa" % os.environ["REPET_PATH"]
23 self._expSequenceCSVFileName = "%s/commons/core/parsing/test/expSequences.csv" % os.environ["REPET_PATH"]
24 self._expBatchFileName = "%s/commons/core/parsing/test/expBatch.txt" % os.environ["REPET_PATH"]
25 self._expBatchLineFileName = "%s/commons/core/parsing/test/expBatchLine.csv" % os.environ["REPET_PATH"]
26
27 self._realInputFileName = "data/real_multifasta_input.fasta"
28 self._realExpSubSNPFileName = "data/realExpSubSNP.csv"
29 self._realExpSequenceFSAFileName = "data/realExpSequences.fsa"
30 self._realExpBatchLineFileName = "data/realExpBatchLine.csv"
31 self._realExpIndividualFileName = "data/realExpIndividual.csv"
32
33 self._inputDirSeveralBatches = "%s/commons/core/parsing/test/severalBatchDir" % os.environ["REPET_PATH"]
34
35 self._obsSubSNPFileName = "SubSNP.csv"
36 self._obsAlleleFileName = "Allele.csv"
37 self._obsIndividualFileName = "Individual.csv"
38 self._obsSequenceFSAFileName = "Sequences.fsa"
39 self._obsSequenceCSVFileName = "Sequences.csv"
40 self._obsBatchFileName = "Batch.txt"
41 self._obsBatchLineFileName = "BatchLine.csv"
42
43 self._fileUtils = FileUtils()
44
45 def tearDown(self):
46 os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"])
47 logFileName = "multifasta2SNP.log"
48 if self._fileUtils.isRessourceExists(self._inFileName):
49 os.remove(self._inFileName)
50 if self._fileUtils.isRessourceExists(self._obsSubSNPFileName):
51 os.remove(self._obsSubSNPFileName)
52 if self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered"):
53 os.remove(self._obsSubSNPFileName + "_filtered")
54 if self._fileUtils.isRessourceExists(self._obsAlleleFileName):
55 os.remove(self._obsAlleleFileName)
56 if self._fileUtils.isRessourceExists(self._obsIndividualFileName):
57 os.remove(self._obsIndividualFileName)
58 if self._fileUtils.isRessourceExists(self._obsSequenceFSAFileName):
59 os.remove(self._obsSequenceFSAFileName)
60 if self._fileUtils.isRessourceExists(self._obsSequenceCSVFileName):
61 os.remove(self._obsSequenceCSVFileName)
62 if self._fileUtils.isRessourceExists(self._obsBatchFileName):
63 os.remove(self._obsBatchFileName)
64 if self._fileUtils.isRessourceExists(self._obsBatchLineFileName):
65 os.remove(self._obsBatchLineFileName)
66
67 if self._fileUtils.isRessourceExists(self._expSubSNPFileName):
68 os.remove(self._expSubSNPFileName)
69 if self._fileUtils.isRessourceExists(self._realExpSubSNPFileName + "_filtered"):
70 os.remove(self._realExpSubSNPFileName + "_filtered")
71 if self._fileUtils.isRessourceExists(self._expAlleleFileName):
72 os.remove(self._expAlleleFileName)
73 if self._fileUtils.isRessourceExists(self._expIndividualFileName):
74 os.remove(self._expIndividualFileName)
75 if self._fileUtils.isRessourceExists(self._expSequenceFSAFileName):
76 os.remove(self._expSequenceFSAFileName)
77 if self._fileUtils.isRessourceExists(self._expSequenceCSVFileName):
78 os.remove(self._expSequenceCSVFileName)
79 if self._fileUtils.isRessourceExists(self._expBatchFileName):
80 os.remove(self._expBatchFileName)
81 if self._fileUtils.isRessourceExists(self._expBatchLineFileName):
82 os.remove(self._expBatchLineFileName)
83
84 if self._fileUtils.isRessourceExists(logFileName):
85 os.remove(logFileName)
86 if self._fileUtils.isRessourceExists(self._inputDirSeveralBatches):
87 shutil.rmtree(self._inputDirSeveralBatches)
88
89
90 def test_runOneBatch(self):
91 self._writeInputFile()
92 self._writeExpSubSNPFile()
93 self._writeExpAlleleFile()
94 self._writeExpIndividualFile()
95 self._writeExpSequenceFile()
96 self._writeExpBatchFile()
97 self._writeExpBatchLineFile()
98
99 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase")
100 multifasta2SNPFile.runOneBatch(self._inFileName)
101
102 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName))
103 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName))
104
105 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName))
106 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName))
107
108 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName))
109 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName))
110
111 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName))
112 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName))
113
114 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName))
115 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName))
116
117 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName))
118 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName))
119 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName))
120 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName))
121
122 def test_runOneBatch_with_a_real_input_file(self):
123 self._writeRealExpAlleleFile()
124 self._writeRealExpSequenceCSVFile()
125 self._writeRealExpBatchFile()
126
127 multifasta2SNPFile = Multifasta2SNPFile("Pinus pinaster", "INRA_Pinus_pinaster_HDZ31-1", "PpHDZ31")
128 multifasta2SNPFile.runOneBatch(self._realInputFileName)
129
130 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName))
131 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpIndividualFileName, self._obsIndividualFileName))
132
133 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName))
134 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpSequenceFSAFileName, self._obsSequenceFSAFileName))
135
136 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName))
137 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName))
138
139 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName))
140 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName))
141
142 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName))
143 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpBatchLineFileName, self._obsBatchLineFileName))
144
145 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName))
146 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName))
147
148 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName))
149 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpSubSNPFileName , self._obsSubSNPFileName))
150
151 def test_runOneBatch_with_errors_in_refSeq(self):
152 self._writeInputFileWithSeqErrorsInRefSeq()
153 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase")
154 self.assertRaises(Exception, multifasta2SNPFile.runOneBatch, self._inFileName, self._obsSubSNPFileName)
155
156 def test_runOneBatch_with_errors_in_lineSeq(self):
157 self._writeInputFileWithSeqErrorsInOneLineSeq()
158 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase")
159 self.assertRaises(Exception, multifasta2SNPFile.runOneBatch, self._inFileName, self._obsSubSNPFileName)
160
161 def test_runOneBatch_with_a_several_lineSeq(self):
162 self._writeInputFileWithASeveralLineSeq()
163 self._writeExpSubSNPFileSeveralLineSeq()
164 self._writeExpAlleleFile()
165 self._writeExpIndividualFile()
166 self._writeExpSequenceFileSeveralLineSeq()
167 self._writeExpBatchFile()
168 self._writeExpBatchLineFile()
169
170 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase")
171 multifasta2SNPFile.runOneBatch(self._inFileName)
172
173 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName))
174 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName))
175
176 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName))
177 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName))
178
179 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName))
180 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName))
181
182 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName))
183 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName))
184
185 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName))
186 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName))
187
188 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName))
189 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName))
190
191 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName))
192 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName))
193
194 def test_runOneBatch_with_2_seqs_with_the_same_name(self):
195 self._writeInputFileWith2SeqsWithTheSameName()
196 batchName = "batch1"
197 taxon = "Arabidopsis thaliana"
198 gene = "methyltransferase"
199 isSysExitRaised = False
200 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
201
202 try:
203 multifasta2SNPFile.runOneBatch(self._inFileName)
204 except SystemExit:
205 isSysExitRaised = True
206
207 self.assertTrue(isSysExitRaised)
208
209 def test_runOneBatch_with_indels_and_snps(self):
210 self._writeInputFileWithSnpsAndIndels()
211 self._writeExpSubSNPFileWithSnpsAndIndels()
212 self._writeExpAlleleFileWithSnpsAndIndels()
213 self._writeExpIndividualFile()
214 self._writeExpSequenceFileWithDeletion()
215 self._writeExpBatchFile()
216 self._writeExpBatchLineFile()
217
218 batchName = "Batch1"
219 taxon = "Arabidopsis thaliana"
220 gene = "methyltransferase"
221 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
222 multifasta2SNPFile.runOneBatch(self._inFileName)
223
224 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName))
225 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName))
226
227 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName))
228 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName))
229
230 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName))
231 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName))
232
233 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName))
234 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName))
235
236 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName))
237 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName))
238
239 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName))
240 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName))
241
242 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName))
243 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName))
244
245 def test_runOneBatchWithPotentialDooblons(self):
246 self._writeInputFileBatchWithPotentialDooblons()
247
248 batchName = "Batch_AU247387"
249 taxon = "Arabidopsis thaliana"
250 gene = "methyltransferase"
251 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
252 multifasta2SNPFile.runOneBatch(self._inFileName)
253 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName))
254
255 expSubSNPFile = "data/ExpPotDooblonsSubSNP.csv"
256
257 Utils4Test.removeOneSpecifiedColumn(expSubSNPFile, ";", 8)
258 Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName, ";", 8)
259
260 Utils4Test.removeOneSpecifiedColumn(expSubSNPFile + "_filtered", ";", 9)
261 Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName + "_filtered", ";", 9)
262
263 Utils4Test.removeOneSpecifiedColumn(expSubSNPFile + "_filtered_filtered", ";", 13)
264 Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName + "_filtered_filtered", ";", 13)
265
266 comparableExpSubSNPFile = expSubSNPFile + "_filtered_filtered_filtered"
267 comparableObsSubSNPFile = self._obsSubSNPFileName + "_filtered_filtered_filtered"
268
269 self.assertTrue(FileUtils.isRessourceExists(comparableExpSubSNPFile))
270 self.assertTrue(FileUtils.isRessourceExists(comparableObsSubSNPFile))
271 self.assertTrue(FileUtils.are2FilesIdentical(comparableExpSubSNPFile, comparableObsSubSNPFile))
272
273 if(self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered")):
274 os.remove(self._obsSubSNPFileName + "_filtered")
275 if(self._fileUtils.isRessourceExists(expSubSNPFile + "_filtered")):
276 os.remove(expSubSNPFile + "_filtered")
277
278 if(self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered_filtered")):
279 os.remove(self._obsSubSNPFileName + "_filtered_filtered")
280 if(self._fileUtils.isRessourceExists(expSubSNPFile + "_filtered_filtered")):
281 os.remove(expSubSNPFile + "_filtered_filtered")
282
283 if self._fileUtils.isRessourceExists(comparableExpSubSNPFile):
284 os.remove(comparableExpSubSNPFile)
285 if self._fileUtils.isRessourceExists(comparableObsSubSNPFile):
286 os.remove(comparableObsSubSNPFile)
287
288 def test_runSeveralBatches(self):
289 self._writeInputFileSeveralBatches()
290 self._writeExpSubSNPFileSeveralBatches()
291 self._writeExpAlleleFileSeveralBatches()
292 self._writeExpIndividualFile()
293 self._writeExpSequenceSeveralBatches()
294 self._writeExpBatchFileSeveralBatches()
295 self._writeExpBatchLineFileSeveralBatches()
296
297 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana")
298 multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches)
299
300 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsAlleleFileName))
301 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._inputDirSeveralBatches + "/" + self._obsAlleleFileName))
302
303 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" +self._obsIndividualFileName))
304 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._inputDirSeveralBatches + "/" + self._obsIndividualFileName))
305
306 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName))
307 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName))
308
309 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName))
310 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName))
311
312 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchFileName))
313 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._inputDirSeveralBatches + "/" + self._obsBatchFileName))
314
315 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName))
316 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName))
317 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName))
318 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName))
319
320 def test_runSeveralBatches_different_lines_between_files(self):
321 self._writeInputFileSeveralBatches_different_lines_between_files()
322 self._writeExpSubSNPFileSeveralBatches_different_lines_between_files()
323 self._writeExpAlleleFileSeveralBatches()
324 self._writeExpIndividualFile_different_lines_between_files()
325 self._writeExpSequenceSeveralBatches()
326 self._writeExpBatchFileSeveralBatches()
327 self._writeExpBatchLineFileSeveralBatches_different_lines_between_files()
328
329 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana")
330 multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches)
331
332 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsAlleleFileName))
333 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._inputDirSeveralBatches + "/" + self._obsAlleleFileName))
334
335 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" +self._obsIndividualFileName))
336 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._inputDirSeveralBatches + "/" + self._obsIndividualFileName))
337
338 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName))
339 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName))
340
341 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName))
342 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName))
343
344 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchFileName))
345 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._inputDirSeveralBatches + "/" + self._obsBatchFileName))
346
347 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName))
348 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName))
349 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName))
350 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName))
351
352 def test_runSeveralBatches_different_lines_and_same_refseq_between_files(self):
353 self._writeInputFileSeveralBatches_different_lines_and_same_refseq_between_files()
354 self._writeExpSubSNPFileSeveralBatches_different_lines_between_files()
355 self._writeExpAlleleFileSeveralBatches()
356 self._writeExpIndividualFile_different_lines_between_files()
357 self._writeExpSequenceSeveralBatchesForSameRefSeq()
358 self._writeExpBatchFileSeveralBatchesForSameRefSeq()
359 self._writeExpBatchLineFileSeveralBatches_different_lines_between_files()
360
361 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana")
362 try:
363 multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches)
364 except Exception, e :
365 self.assertRaises(Exception, e)
366
367 def test_detectSNPAndIndels(self):
368 refBioseq = Bioseq()
369 alignedBioseqDB = BioseqDB()
370 batchName = "batch1"
371 taxon = "Arabidopsis thaliana"
372 gene = "methyltransferase"
373 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
374 refBioseq.sequence = "ATTCGCGTATGCGTATGCTT"
375 refBioseq.header = "reference"
376
377 bs1 = Bioseq( "line1", "ATCCGCGTATGCGTATGATT" )
378 bs2 = Bioseq( "line2", "ATTCGTGTATGCGTATGGTT" )
379
380 alignedBioseqDB.setData( [ bs1, bs2 ] )
381
382 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
383 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
384 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
385
386 dExpAllele = {'C': 1, 'A': 2, 'T': 3, 'G': 4 }
387 lExpSNP = [{'subSNPName': batchName + "_SNP_3_line1", 'position': 3, 'lineName': 1, 'allele': 1, '5flank': "AT", '3flank': "CGCGTATGCGTATGATT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
388 {'subSNPName': batchName + "_SNP_3_line2", 'position': 3, 'lineName': 2, 'allele': 3, '5flank': "AT", '3flank': "CGTGTATGCGTATGGTT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
389 {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 3, '5flank': "ATTCG", '3flank': "GTATGCGTATGGTT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
390 {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 1, '5flank': "ATCCG", '3flank': "GTATGCGTATGATT",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
391 {'subSNPName': batchName + "_SNP_18_line1", 'position': 18, 'lineName': 1, 'allele': 2, '5flank': "ATCCGCGTATGCGTATG", '3flank': "TT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
392 {'subSNPName': batchName + "_SNP_18_line2", 'position': 18, 'lineName': 2, 'allele': 4, '5flank': "ATTCGTGTATGCGTATG", '3flank': "TT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}]
393 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
394 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},]
395
396 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
397 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
398 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
399
400 def test_detectSNPAndIndels_no_polym(self):
401 refBioseq = Bioseq()
402 alignedBioseqDB = BioseqDB()
403 batchName = "batch1"
404 taxon = "Arabidopsis thaliana"
405 gene = "methyltransferase"
406 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
407 refBioseq.sequence = "ATTCGCGTATGCGTATGCTT"
408 refBioseq.header = "reference"
409
410 bs1 = Bioseq( "line1", "ATTCGCGTATGCGTATGCTT" )
411 bs2 = Bioseq( "line2", "ATTCGCGTATGCGTATGCTT" )
412
413 alignedBioseqDB.setData( [ bs1, bs2 ] )
414
415 instance = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
416
417 multifasta2SNPFile.detectSNPsAndIndels(instance)
418
419 lExpSNP = []
420
421 self.assertEquals(lExpSNP, multifasta2SNPFile._lSubSNPFileResults)
422
423 def test_detectSNPAndIndels_with_only_dels(self):
424 refBioseq = Bioseq()
425 alignedBioseqDB = BioseqDB()
426 batchName = "batch1"
427 taxon = "Arabidopsis thaliana"
428 gene = "methyltransferase"
429 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
430 refBioseq.sequence = "ATTACCGAA"
431 refBioseq.header = "reference"
432
433 bs1 = Bioseq( "line1", "A--ACCGAA" )
434 bs2 = Bioseq( "line2", "---ACCGAA" )
435
436 alignedBioseqDB.setData( [ bs1, bs2 ] )
437
438 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
439 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
440 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
441
442 dExpAllele = {'A--': 1, '---': 2}
443 lExpSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 3},
444 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 1, '5flank': "", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 3}]
445 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
446 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}]
447
448 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
449 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
450 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
451
452 def test_detectSNPAndIndels_with_dels_and_snps(self):
453 refBioseq = Bioseq()
454 alignedBioseqDB = BioseqDB()
455 batchName = "batch1"
456 taxon = "Arabidopsis thaliana"
457 gene = "methyltransferase"
458 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
459 refBioseq.sequence = "ATTACCGAA"
460 refBioseq.header = "reference"
461
462 bs1 = Bioseq( "line1", "A--ACCGAA" )
463 bs2 = Bioseq( "line2", "---ACCGAA" )
464 bs3 = Bioseq( "line3", "ATTACCGGA" )
465 bs4 = Bioseq( "line4", "----CCGAA" )
466
467 alignedBioseqDB.setData( [ bs1, bs2, bs3, bs4 ] )
468
469 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
470 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
471 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
472
473 dExpAllele = {'G': 1, 'A--A': 2, '---A': 3, '----': 4, 'ATTA': 5, 'A': 6}
474 lExpSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
475 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
476 {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
477 {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
478 {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
479 {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
480 {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
481 {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}]
482 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
483 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},
484 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"},
485 {'individualNumber': 4, 'individualName': "line4", 'scientificName': "Arabidopsis thaliana"}]
486
487 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
488 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
489 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
490
491 def test_detectSNPAndIndels_with_only_inserts(self):
492 refBioseq = Bioseq()
493 alignedBioseqDB = BioseqDB()
494 batchName = "batch1"
495 taxon = "Arabidopsis thaliana"
496 gene = "methyltransferase"
497 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
498 refBioseq.sequence = "A--ACCGAA"
499 refBioseq.header = "reference"
500
501 bs1 = Bioseq( "line1", "A--ACCGAA" )
502 bs2 = Bioseq( "line2", "AG-ACCGAA" )
503 bs3 = Bioseq( "line3", "ATTACCGAA" )
504
505 alignedBioseqDB.setData( [ bs1, bs2, bs3 ] )
506
507 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
508 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
509 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
510
511 dExpAllele = {'G-': 1, 'TT': 2, '--': 3}
512 lExpSNP = [{'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 1, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
513 {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 2, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
514 {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 3, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}]
515 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
516 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},
517 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}]
518
519 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
520 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
521 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
522
523 def test_detectSNPAndIndels_with_snps_and_inserts(self):
524 refBioseq = Bioseq()
525 alignedBioseqDB = BioseqDB()
526 batchName = "batch1"
527 taxon = "Arabidopsis thaliana"
528 gene = "methyltransferase"
529 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
530 refBioseq.sequence = "A--ACCGAA"
531 refBioseq.header = "reference"
532
533 bs1 = Bioseq( "line1", "A--ACCGAA" )
534 bs2 = Bioseq( "line2", "AG-ACCGAA" )
535 bs3 = Bioseq( "line3", "ATTACCGCA" )
536
537 alignedBioseqDB.setData( [ bs1, bs2, bs3 ] )
538
539 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
540 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
541 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
542
543 dExpAllele = {'C': 1, 'G-': 2, 'TT': 3, '--': 4, 'A' : 5}
544 lExpSNP = [{'subSNPName': batchName + "_SNP_6_line3", 'position': 6, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
545 {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 5, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
546 {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 5, '5flank': "AG-ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
547 {'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
548 {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 3, '5flank': "A", '3flank': "ACCGCA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
549 {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 4, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}]
550 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
551 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},
552 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}]
553
554 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
555 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
556 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
557
558 def test_detectSNPAndIndels_with_snps_inserts_and_dels(self):
559 refBioseq = Bioseq()
560 alignedBioseqDB = BioseqDB()
561 batchName = "batch1"
562 taxon = "Arabidopsis thaliana"
563 gene = "methyltransferase"
564 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
565 refBioseq.sequence = "A--ACCGAATATAC"
566 refBioseq.header = "reference"
567
568 bs1 = Bioseq( "line1", "A--ACCGAATATAC" )
569 bs2 = Bioseq( "line2", "AG-ACCGAAT--AC" )
570 bs3 = Bioseq( "line3", "ATTACCGCA-----" )
571
572 alignedBioseqDB.setData( [ bs1, bs2, bs3 ] )
573
574 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
575 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"}
576 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper)
577
578 dExpAllele = {'C': 1, 'G-': 2, 'T--AC': 3, 'TT': 4, '-----': 5, '--': 6, 'TATAC': 7, 'A': 8}
579 lExpSNP = [{'subSNPName': batchName + "_SNP_6_line3", 'position': 6, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A-----", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
580 {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 8, '5flank': "A--ACCG", '3flank': "ATATAC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
581 {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 8, '5flank': "AG-ACCG", '3flank': "AT--AC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
582
583 {'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "A", '3flank': "ACCGAAT--AC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
584 {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 4, '5flank': "A", '3flank': "ACCGCA-----", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
585 {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 6, '5flank': "A", '3flank': "ACCGAATATAC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2},
586
587 {'subSNPName': batchName + "_DEL_8_line2", 'position': 8, 'lineName': 2, 'allele': 3, '5flank': "AG-ACCGAA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5},
588 {'subSNPName': batchName + "_DEL_8_line3", 'position': 8, 'lineName': 3, 'allele': 5, '5flank': "ATTACCGCA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5},
589 {'subSNPName': batchName + "_DEL_8_line1", 'position': 8, 'lineName': 1, 'allele': 7, '5flank': "A--ACCGAA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5}]
590 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"},
591 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},
592 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}]
593
594 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults)
595 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults)
596 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults)
597
598 def test_createWrapperFromFile_with_upcase_and_lowcase_nucleotide(self):
599 self._writeInputFileWithUpcaseAndLowcaseNucleotide()
600 batchName = "batch1"
601 taxon = "Arabidopsis thaliana"
602 gene = "methyltransferase"
603 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
604
605 expLineBioseqDB = BioseqDB()
606 expRefBioseq = Bioseq("Sequence_de_Reference",\
607 "CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA")
608 iBioSeq = Bioseq("Line1","CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA")
609 expLineBioseqDB.add ( iBioSeq )
610 iBioSeq = Bioseq("Line2","CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA")
611 expLineBioseqDB.add ( iBioSeq )
612
613 expBioseqDBWrapper = ReferenceBioseqAndLinesBioseqDBWrapper (expRefBioseq, expLineBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
614
615 obsBioseqDBWrapper = multifasta2SNPFile.createWrapperFromFile(self._inFileName)
616
617 self.assertEquals(obsBioseqDBWrapper._iReferenceBioseq, expBioseqDBWrapper._iReferenceBioseq)
618 self.assertEquals(obsBioseqDBWrapper._iLinesBioseqDB, expBioseqDBWrapper._iLinesBioseqDB)
619
620 def test_checkHeaderAlphabet(self):
621 # header ALPHABET [^a-zA-Z0-9_-:]
622 batchName = "batch1"
623 taxon = "Arabidopsis thaliana"
624 gene = "methyltransferase"
625 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
626 strToBeCheck="abcdefghijklmnopqrstuvwxyz0912834567_:-"
627 self.assertTrue ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck))
628 strToBeCheck="ABCDEFGHIJKLMNOPQRSTUVWXYZ0912834567_:-"
629 self.assertTrue ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck))
630
631 def test_checkHeaderAlphabet_empty_string(self):
632 batchName = "batch1"
633 taxon = "Arabidopsis thaliana"
634 gene = "methyltransferase"
635 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
636 strToBeCheck=""
637 self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck))
638
639 def test_checkHeaderAlphabet_space(self):
640 batchName = "batch1"
641 taxon = "Arabidopsis thaliana"
642 gene = "methyltransferase"
643 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
644 strToBeCheck=" "
645 self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck))
646
647 def test_checkHeaderAlphabet_non_aphabetical(self):
648 batchName = "batch1"
649 taxon = "Arabidopsis thaliana"
650 gene = "methyltransferase"
651 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
652 strToBeCheck="}"
653 self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck))
654
655 def test_isDNA_bases( self ):
656 batchName = "batch1"
657 taxon = "Arabidopsis thaliana"
658 gene = "methyltransferase"
659 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
660 strToBeCheck="TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA"
661 self.assertTrue ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
662
663 def test_isDNA_bases_non_DNA_letter( self ):
664 batchName = "batch1"
665 taxon = "Arabidopsis thaliana"
666 gene = "methyltransferase"
667 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
668 strToBeCheck="XTAGTTGATCA"
669 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
670
671 def test_isDNA_bases_carriage_return( self ):
672 batchName = "batch1"
673 taxon = "Arabidopsis thaliana"
674 gene = "methyltransferase"
675 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
676 strToBeCheck="TA\nGTTGATCA"
677 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
678
679 def test_isDNA_bases_empty_string( self ):
680 batchName = "batch1"
681 taxon = "Arabidopsis thaliana"
682 gene = "methyltransferase"
683 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
684 strToBeCheck=""
685 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
686
687 def test_isDNA_bases_space( self ):
688 batchName = "batch1"
689 taxon = "Arabidopsis thaliana"
690 gene = "methyltransferase"
691 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
692 strToBeCheck=" "
693 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
694
695 def test_isDNA_bases_IUPAC_letter_but_non_DNA_bases( self ):
696 batchName = "batch1"
697 taxon = "Arabidopsis thaliana"
698 gene = "methyltransferase"
699 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
700 strToBeCheck="UMWSB"
701 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck))
702
703 def test_getLineAsAHeader (self):
704 lineToBeCheck=">test on good header"
705 batchName = "batch1"
706 expHeader = "test_on_good_header"
707 taxon = "Arabidopsis thaliana"
708 gene = "methyltransferase"
709 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
710 obsHeader = multifasta2SNPFile.getLineAsAHeader(lineToBeCheck)
711 self.assertEqual(obsHeader,expHeader)
712
713 def test_getLineAsAHeader_warning_bad_header_tag_omitted(self):
714
715 lineToBeCheck="test on bad header with tag omitted"
716 batchName = "batch1"
717 taxon = "Arabidopsis thaliana"
718 gene = "methyltransferase"
719 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
720 try :
721 expHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck )
722 except Exception, e :
723 self.assertRaises(Exception, e , self._inFileName, self._obsSubSNPFileName)
724
725 def test_getLineAsAHeader_warning_repeated_blanks_removed(self):
726
727 lineToBeCheck =">test on header \twith warning"
728 expHeader = "test_on_header_with_warning"
729 batchName = "batch1"
730 taxon = "Arabidopsis thaliana"
731 gene = "methyltransferase"
732 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
733 obsHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck )
734 self.assertEquals( obsHeader, expHeader)
735 self.assertRaises(Exception, multifasta2SNPFile.getLineAsAHeader( lineToBeCheck ) , self._inFileName, self._obsSubSNPFileName)
736
737 def test_getLineAsAHeader_fatal_error_bad_header(self):
738 lineToBeCheck=">test\on bad header with fatal error"
739
740 batchName = "batch1"
741 taxon = "Arabidopsis thaliana"
742 gene = "methyltransferase"
743 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
744 try :
745 expHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck )
746 except Exception, e :
747 self.assertRaises(Exception, e , self._inFileName, self._obsSubSNPFileName)
748
749 def test_isHeaderInRefSeqList(self):
750 header = "line1"
751 bs1 = Bioseq( "line1", "A--ACCGAATATAC" )
752 bs2 = Bioseq( "line2", "AG-ACCGAAT--AC" )
753 bs3 = Bioseq( "line3", "ATTACCGCA-----" )
754
755 batchName = "batch1"
756 taxon = "Arabidopsis thaliana"
757 gene = "methyltransferase"
758
759 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
760 multifasta2SNPFile._lRefSequences = [bs1, bs2, bs3]
761 try:
762 isHeader = multifasta2SNPFile.isHeaderInRefSeqList(header)
763 except Exception, e :
764 self.assertRaises(Exception, e)
765
766 def test_completeAlleleSetWithCurrentAllele_one_allele_added(self):
767 dAlleleSetInInput = {"A" : 1,
768 "T" : 2,
769 "G" : 3}
770 alleleToAdd = "C"
771 dAlleleExpSet = {"A" : 1,
772 "T" : 2,
773 "G" : 3,
774 "C" : 4}
775 batchName = "batch1"
776 taxon = "Arabidopsis thaliana"
777 gene = "methyltransferase"
778 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
779 dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd)
780 self.assertEquals(dAlleleObsSet, dAlleleExpSet)
781
782 def test_completeAlleleSetWithCurrentAllele_no_allele_added(self):
783 dAlleleSetInInput = {"A" : 1,
784 "T" : 2,
785 "G" : 3}
786 alleleToAdd = "T"
787 dAlleleExpSet = {"A" : 1,
788 "T" : 2,
789 "G" : 3}
790 batchName = "batch1"
791 taxon = "Arabidopsis thaliana"
792 gene = "methyltransferase"
793 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
794 dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd)
795 self.assertEquals(dAlleleObsSet, dAlleleExpSet)
796
797 def test_completeAlleleSetWithCurrentAllele_with_an_empty_allele_set(self):
798 dAlleleSetInInput = {}
799 alleleToAdd = "T"
800 dAlleleExpSet = {"T" : 1}
801 batchName = "batch1"
802 taxon = "Arabidopsis thaliana"
803 gene = "methyltransferase"
804 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
805 dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd)
806 self.assertEquals(dAlleleObsSet, dAlleleExpSet)
807
808 def test_completeBatchLineListWithCurrentIndividual(self):
809 #TODO: this test only pass with a batchNumber of 1
810 iCurrentBatchNumber = 1
811 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': iCurrentBatchNumber},
812 {'IndividualNumber': "2", 'BatchNumber': iCurrentBatchNumber}]
813 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"},
814 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"},
815 {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}]
816 lExpBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': iCurrentBatchNumber},
817 {'IndividualNumber': "2", 'BatchNumber': iCurrentBatchNumber},
818 {'IndividualNumber': "3", 'BatchNumber': iCurrentBatchNumber}]
819 lineName2Add = "Individual3"
820 batchName = "batch1"
821 taxon = "Arabidopsis thaliana"
822 gene = "methyltransferase"
823 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
824 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add)
825 self.assertEquals(lBatchLineResults, lExpBatchLineResults)
826
827 def test_completeBatchLineListWithCurrentIndividual_no_entries_in_batchline_results_in_input(self):
828 lBatchLineResults = []
829 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"},
830 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"},
831 {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}]
832 lExpBatchLineResults = [{'IndividualNumber': "2", 'BatchNumber': 1}]
833 lineName2Add = "Individual2"
834 batchName = "batch1"
835 taxon = "Arabidopsis thaliana"
836 gene = "methyltransferase"
837 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
838 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add)
839 self.assertEquals(lBatchLineResults, lExpBatchLineResults)
840
841 def test_completeBatchLineListWithCurrentIndividual_no_individual_in_individualList(self):
842 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': 1},
843 {'IndividualNumber': "2", 'BatchNumber': 1}]
844 lIndividualResults = []
845
846 lineName2Add = "Individual3"
847 batchName = "batch1"
848 taxon = "Arabidopsis thaliana"
849 gene = "methyltransferase"
850 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
851 try:
852 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add)
853 except Exception, e :
854 self.assertRaises(Exception, e)
855
856 def test_completeBatchLineListWithCurrentIndividual_individual_added_has_no_individual_number(self):
857 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': "1"},
858 {'IndividualNumber': "2", 'BatchNumber': "1"}]
859 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"},
860 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"},
861 {'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}]
862
863 lineName2Add = "Individual3"
864 batchName = "batch1"
865 taxon = "Arabidopsis thaliana"
866 gene = "methyltransferase"
867 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
868 try:
869 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add)
870 except Exception, e :
871 self.assertRaises(Exception, e)
872
873 def test_completeBatchLineListWithCurrentIndividual_individual_not_present_in_individualList(self):
874 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': "1"},
875 {'IndividualNumber': "2", 'BatchNumber': "1"}]
876 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"},
877 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"},
878 {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}]
879
880 lineName2Add = "Michael Corleone"
881 batchName = "batch1"
882 taxon = "Arabidopsis thaliana"
883 gene = "methyltransferase"
884 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene)
885 try:
886 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add)
887 except Exception, e :
888 self.assertRaises(Exception, e)
889
890 def test_findASubSNPInAListWithHisName(self):
891 lSubSNPList = [{'subSNPName': "SubSNP_batch1_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"},
892 {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"},
893 {'subSNPName': "SubSNP_batch1_6_line1", 'position': 6, 'lineName': 1, 'allele': 3, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP"}]
894 name = "SubSNP_batch1_2_line1"
895
896 dExpSubSNP = {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"}
897 expIndice = 1
898
899 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
900
901 dObsSubSNP, obsIndice = multifasta2SNPFile.findASubSNPInAListWithHisName(name, lSubSNPList)
902
903 self.assertEquals(expIndice, obsIndice)
904 self.assertEquals(dExpSubSNP, dObsSubSNP)
905
906 def test_findASubSNPInAListWithHisName_SubSNP_not_found(self):
907 lSubSNPList = [{'subSNPName': "SubSNP_batch1_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"},
908 {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"},
909 {'subSNPName': "SubSNP_batch1_6_line1", 'position': 6, 'lineName': 1, 'allele': 3, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP"}]
910 name = "SubSNP_fake"
911
912 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
913
914 try:
915 dObsSubSNP, obsIndice = multifasta2SNPFile.findASubSNPInAListWithHisName(name, lSubSNPList)
916 except Exception, e :
917 self.assertRaises(Exception, e)
918
919 def test_clusteriseIndels(self):
920 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
921 lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
922 {'name' : "indel2", 'start': 12, 'end': 15},
923 {'name' : "indel3",'start': 5, 'end': 10}]
924 dIndel = {'start': 1, 'end': 6}
925
926 lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList)
927 lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 10},
928 {'name' : "indel2", 'start': 12, 'end': 15},
929 {'name' : "indel3", 'start': 1, 'end': 10}]
930
931 self.assertEquals(lexpIndelsList, lObsIndelsList)
932
933 def test_clusteriseIndels_no_overlap(self):
934 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
935 lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
936 {'name' : "indel2", 'start': 12, 'end': 15},
937 {'name' : "indel3",'start': 25, 'end': 30}]
938 dIndel = {'start': 1, 'end': 6}
939
940 lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList)
941 lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
942 {'name' : "indel2", 'start': 12, 'end': 15},
943 {'name' : "indel3", 'start': 25, 'end': 30}]
944
945 self.assertEquals(lexpIndelsList, lObsIndelsList)
946
947 def test_clusteriseIndels_many_overlaps_complicated(self):
948 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
949 lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
950 {'name' : "indel2", 'start': 12, 'end': 15},
951 {'name' : "indel3",'start': 5, 'end': 10},
952 {'name' : "indel4",'start': 9, 'end': 40}]
953 dIndel = {'start': 5, 'end': 10}
954
955 lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList)
956 lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 40},
957 {'name' : "indel2", 'start': 1, 'end': 40},
958 {'name' : "indel3", 'start': 1, 'end': 40},
959 {'name' : "indel4",'start': 1, 'end': 40}]
960
961 self.assertEquals(lexpIndelsList, lObsIndelsList)
962
963 def test_updateBoundsForAnIndelInAnIndelList(self):
964 lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
965 {'name' : "indel2", 'start': 12, 'end': 15},
966 {'name' : "indel3",'start': 5, 'end': 10},
967 {'name' : "indel4",'start': 9, 'end': 40}]
968 dIndelWithNewBounds = {'name': "indel2", 'start': 7, 'end': 19}
969 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
970 lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds)
971 lExpNewIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
972 {'name' : "indel2", 'start': 7, 'end': 19},
973 {'name' : "indel3",'start': 5, 'end': 10},
974 {'name' : "indel4",'start': 9, 'end': 40}]
975 self.assertEquals(lExpNewIndelsList, lObsNewIndelsList)
976
977 def test_updateBoundsForAnIndelInAnIndelList_no_update_to_do(self):
978 lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
979 {'name' : "indel2", 'start': 12, 'end': 15},
980 {'name' : "indel3",'start': 5, 'end': 10},
981 {'name' : "indel4",'start': 9, 'end': 40}]
982 dIndelWithNewBounds = {'name': "indel2", 'start': 12, 'end': 15}
983 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
984 lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds)
985 lExpNewIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
986 {'name' : "indel2", 'start': 12, 'end': 15},
987 {'name' : "indel3",'start': 5, 'end': 10},
988 {'name' : "indel4",'start': 9, 'end': 40}]
989 self.assertEquals(lExpNewIndelsList, lObsNewIndelsList)
990
991 def test_updateBoundsForAnIndelInAnIndelList_indel_2_update_does_not_exist(self):
992 lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6},
993 {'name' : "indel2", 'start': 12, 'end': 15},
994 {'name' : "indel3",'start': 5, 'end': 10},
995 {'name' : "indel4",'start': 9, 'end': 40}]
996 dIndelWithNewBounds = {'name': "DeNiro", 'start': 12, 'end': 15}
997 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
998 try:
999 lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds)
1000 except Exception, e :
1001 self.assertRaises(Exception, e)
1002
1003 def test_mergeBoundsFor2Indels(self):
1004 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1005 dIndel1 = {'start': 1, 'end': 4}
1006 dIndel2 = {'start': 2, 'end': 15}
1007 dIndel1, dIndel2 = multifasta2SNPFile.mergeBoundsForTwoOverlappingIndels(dIndel1, dIndel2)
1008 dExpIndel1 = {'start': 1, 'end': 15}
1009 dExpIndel2 = {'start': 1, 'end': 15}
1010 self.assertEquals(dExpIndel1, dIndel1)
1011 self.assertEquals(dExpIndel2, dIndel2)
1012
1013 def test_mergeBoundsFor2Indels_no_overlap(self):
1014 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1015 dIndel1 = {'start': 1, 'end': 4}
1016 dIndel2 = {'start': 5, 'end': 15}
1017 dIndel1, dIndel2 = multifasta2SNPFile.mergeBoundsForTwoOverlappingIndels(dIndel1, dIndel2)
1018 dExpIndel1 = {'start': 1, 'end': 4}
1019 dExpIndel2 = {'start': 5, 'end': 15}
1020 self.assertEquals(dExpIndel1, dIndel1)
1021 self.assertEquals(dExpIndel2, dIndel2)
1022
1023 def test_getUngappedPositionInRefSeq(self):
1024 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1025 refBioseq = Bioseq()
1026 alignedBioseqDB = BioseqDB()
1027 refBioseq.sequence = "A--TTACC-GAA"
1028 refBioseq.header = "reference"
1029 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
1030 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
1031
1032 alignedBioseqDB.setData( [ bs1, bs2 ] )
1033
1034 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
1035
1036 expUngappedPositionFor1 = 1
1037 obsUngappedPositionFor1 = multifasta2SNPFile.getUngappedPositionInRefSeq(1)
1038 expUngappedPositionFor5 = 3
1039 obsUngappedPositionFor5 = multifasta2SNPFile.getUngappedPositionInRefSeq(5)
1040 expUngappedPositionFor10 = 7
1041 obsUngappedPositionFor10 = multifasta2SNPFile.getUngappedPositionInRefSeq(10)
1042
1043 self.assertEquals(expUngappedPositionFor1, obsUngappedPositionFor1)
1044 self.assertEquals(expUngappedPositionFor5, obsUngappedPositionFor5)
1045 self.assertEquals(expUngappedPositionFor10, obsUngappedPositionFor10)
1046
1047 def test_getUngappedPositionInRefSeq_no_gap(self):
1048 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1049 refBioseq = Bioseq()
1050 alignedBioseqDB = BioseqDB()
1051 refBioseq.sequence = "AACTTACCAGAA"
1052 refBioseq.header = "reference"
1053 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
1054 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
1055
1056 alignedBioseqDB.setData( [ bs1, bs2 ] )
1057
1058 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
1059
1060 expUngappedPositionFor1 = 1
1061 obsUngappedPositionFor1 = multifasta2SNPFile.getUngappedPositionInRefSeq(1)
1062 expUngappedPositionFor5 = 5
1063 obsUngappedPositionFor5 = multifasta2SNPFile.getUngappedPositionInRefSeq(5)
1064 expUngappedPositionFor10 = 10
1065 obsUngappedPositionFor10 = multifasta2SNPFile.getUngappedPositionInRefSeq(10)
1066
1067 self.assertEquals(expUngappedPositionFor1, obsUngappedPositionFor1)
1068 self.assertEquals(expUngappedPositionFor5, obsUngappedPositionFor5)
1069 self.assertEquals(expUngappedPositionFor10, obsUngappedPositionFor10)
1070
1071 def test_checkAllSeq_sequences_with_different_sizes_one_seq_longer(self):
1072 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1073 refBioseq = Bioseq()
1074 alignedBioseqDB = BioseqDB()
1075 refBioseq.sequence = "AACTTACCAGAA"
1076 refBioseq.header = "reference"
1077 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
1078 bs2 = Bioseq( "line2", "AACTTACC-GAATTTC" )
1079
1080 alignedBioseqDB.setData( [ bs1, bs2 ] )
1081
1082 try:
1083 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
1084 except Exception, e :
1085 self.assertRaises(Exception, e)
1086 obsMsg = e.message
1087 expMsg = "File: " + self._inFileName + ", problem with the sequence " + bs2.header + ": its length is different from the reference seq! All the sequences must have the same length.\n"
1088 expMsg += "refseq length: " + str(len(refBioseq.sequence)) + "\n"
1089 expMsg += "seq length: " + str(len(bs2.sequence)) + "\n"
1090 self.assertEquals(expMsg, obsMsg)
1091
1092 def test_checkAllSeq_sequences_with_different_sizes_one_seq_shorter(self):
1093 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1094 refBioseq = Bioseq()
1095 alignedBioseqDB = BioseqDB()
1096 refBioseq.sequence = "AACTTACCAGAA"
1097 refBioseq.header = "reference"
1098 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
1099 bs2 = Bioseq( "line2", "AACTTACC" )
1100
1101 alignedBioseqDB.setData( [ bs1, bs2 ] )
1102
1103 try:
1104 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
1105 except Exception, e :
1106 self.assertRaises(Exception, e)
1107 obsMsg = e.message
1108 expMsg = "File: " + self._inFileName + ", problem with the sequence " + bs2.header + ": its length is different from the reference seq! All the sequences must have the same length.\n"
1109 expMsg += "refseq length: " + str(len(refBioseq.sequence)) + "\n"
1110 expMsg += "seq length: " + str(len(bs2.sequence)) + "\n"
1111 self.assertEquals(expMsg, obsMsg)
1112
1113
1114 def test_getFlanksOfASubSNP(self):
1115 refBioseq = Bioseq()
1116 alignedBioseqDB = BioseqDB()
1117 refBioseq.sequence = "AACTTACCAGAA"
1118 refBioseq.header = "reference"
1119 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
1120 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
1121 alignedBioseqDB.setData( [ bs1, bs2 ] )
1122 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1123 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
1124 subsnpPosition = 3
1125 polymLength = 3
1126 lineName = "line1"
1127 exp5flank = "AA"
1128 exp3flank = "TCCAGAA"
1129
1130 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 7)
1131 self.assertEquals(exp5flank, obs5flank)
1132 self.assertEquals(exp3flank, obs3flank)
1133
1134 def test_getFlanksOfASubSNP_flank_truncated(self):
1135 refBioseq = Bioseq()
1136 alignedBioseqDB = BioseqDB()
1137 refBioseq.sequence = "AACTTACCAGAA"
1138 refBioseq.header = "reference"
1139 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
1140 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
1141 alignedBioseqDB.setData( [ bs1, bs2 ] )
1142 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1143 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
1144 subsnpPosition = 3
1145 polymLength = 3
1146 lineName = "line1"
1147 exp5flank = "AA"
1148 exp3flank = "TCCAGAA"
1149
1150 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500)
1151 self.assertEquals(exp5flank, obs5flank)
1152 self.assertEquals(exp3flank, obs3flank)
1153
1154 def test_getFlanksOfASubSNP_empty_seq(self):
1155 refBioseq = Bioseq()
1156 alignedBioseqDB = BioseqDB()
1157 refBioseq.sequence = ""
1158 refBioseq.header = "reference"
1159 bs1 = Bioseq( "line1", "" )
1160 bs2 = Bioseq( "line2", "" )
1161 alignedBioseqDB.setData( [ bs1, bs2 ] )
1162 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1163 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
1164 subsnpPosition = 3
1165 polymLength = 3
1166 lineName = "line1"
1167 exp5flank = ""
1168 exp3flank = ""
1169
1170 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500)
1171 self.assertEquals(exp5flank, obs5flank)
1172 self.assertEquals(exp3flank, obs3flank)
1173
1174 def test_getFlanksOfASubSNP_flank_of_first_base(self):
1175 refBioseq = Bioseq()
1176 alignedBioseqDB = BioseqDB()
1177 refBioseq.sequence = "AACTTACCAGAA"
1178 refBioseq.header = "reference"
1179 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
1180 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
1181 alignedBioseqDB.setData( [ bs1, bs2 ] )
1182 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1183 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
1184 subsnpPosition = 1
1185 polymLength = 1
1186 lineName = "line1"
1187 exp5flank = ""
1188 exp3flank = "ACTTTCCAGAA"
1189
1190 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500)
1191 self.assertEquals(exp5flank, obs5flank)
1192 self.assertEquals(exp3flank, obs3flank)
1193
1194 def test_getFlanksOfASubSNP_flank_of_first_base_with_polym_on_all_sequence(self):
1195 refBioseq = Bioseq()
1196 alignedBioseqDB = BioseqDB()
1197 refBioseq.sequence = "AACTTACCAGAA"
1198 refBioseq.header = "reference"
1199 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
1200 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
1201 alignedBioseqDB.setData( [ bs1, bs2 ] )
1202 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1203 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
1204 subsnpPosition = 1
1205 polymLength = 12
1206 lineName = "line1"
1207 exp5flank = ""
1208 exp3flank = ""
1209 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500)
1210 self.assertEquals(exp5flank, obs5flank)
1211 self.assertEquals(exp3flank, obs3flank)
1212
1213 def test_getFlanksOfASubSNP_flank_of_last_base_with_polym_on_all_sequence(self):
1214 refBioseq = Bioseq()
1215 alignedBioseqDB = BioseqDB()
1216 refBioseq.sequence = "AACTTACCAGAA"
1217 refBioseq.header = "reference"
1218 bs1 = Bioseq( "line1", "AACTTTCCAGAA" )
1219 bs2 = Bioseq( "line2", "AACTTACC-GAA" )
1220 alignedBioseqDB.setData( [ bs1, bs2 ] )
1221 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse")
1222 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName)
1223 subsnpPosition = 12
1224 polymLength = 1
1225 lineName = "line1"
1226 exp5flank = "AACTTTCCAGA"
1227 exp3flank = ""
1228 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500)
1229 self.assertEquals(exp5flank, obs5flank)
1230 self.assertEquals(exp3flank, obs3flank)
1231 #
1232 def test_subSNPExistsInSubSNPList_subSNP_exists(self):
1233 batchName = "batch1"
1234 lSubSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
1235 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
1236 {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
1237 {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
1238 {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
1239 {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
1240 {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
1241 {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}]
1242 multifasta2SNPFile = Multifasta2SNPFile(batchName, "gene1", "mouse")
1243
1244 dSearchedSubSNP = {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}
1245
1246 expResult = multifasta2SNPFile.subSNPExistsInSubSNPList(dSearchedSubSNP, lSubSNP)
1247 obsResult = True
1248
1249 self.assertEquals(expResult, obsResult)
1250
1251 def test_subSNPExistsInSubSNPList_subSNP_does_not_exist(self):
1252 batchName = "batch1"
1253 lSubSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
1254 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
1255 {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
1256 {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
1257 {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
1258 {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1},
1259 {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4},
1260 {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}]
1261 multifasta2SNPFile = Multifasta2SNPFile(batchName, "gene1", "mouse")
1262
1263 dSearchedSubSNP = {'subSNPName': batchName + "_DEL_12_line1", 'position': 12, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}
1264
1265 expResult = multifasta2SNPFile.subSNPExistsInSubSNPList(dSearchedSubSNP, lSubSNP)
1266 obsResult = False
1267
1268 self.assertEquals(expResult, obsResult)
1269
1270 def _writeExpSubSNPFile(self):
1271 expFileHandle = open(self._expSubSNPFileName, "w")
1272 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n")
1273 expFileHandle.write("Batch1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n")
1274 expFileHandle.write("Batch1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
1275 expFileHandle.write("Batch1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n")
1276 expFileHandle.write("Batch1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1277 expFileHandle.write("Batch1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
1278 expFileHandle.write("Batch1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1279 expFileHandle.write("Batch1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
1280 expFileHandle.write("Batch1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1281 expFileHandle.write("Batch1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n")
1282 expFileHandle.write("Batch1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n")
1283 expFileHandle.close()
1284
1285 def _writeExpSubSNPFileWithSnpsAndIndels(self):
1286 expFileHandle = open(self._expSubSNPFileName, "w")
1287 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n")
1288 expFileHandle.write("Batch1_INS_1_Line1;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;1;1;Sequence;;;8\n")
1289 expFileHandle.write("Batch1_INS_1_Line2;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;1;2;Sequence;;;6\n")
1290 expFileHandle.write("Batch1_SNP_2_Line1;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n")
1291 expFileHandle.write("Batch1_SNP_2_Line2;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
1292 expFileHandle.write("Batch1_DEL_8_Line1;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;1;1;Sequence;;;5\n")
1293 expFileHandle.write("Batch1_DEL_8_Line2;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;1;2;Sequence;;;7\n")
1294 expFileHandle.write("Batch1_SNP_19_Line1;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n")
1295 expFileHandle.write("Batch1_SNP_19_Line2;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1296 expFileHandle.write("Batch1_SNP_23_Line1;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
1297 expFileHandle.write("Batch1_SNP_23_Line2;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1298 expFileHandle.write("Batch1_SNP_34_Line1;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
1299 expFileHandle.write("Batch1_SNP_34_Line2;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1300 expFileHandle.write("Batch1_SNP_49_Line1;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n")
1301 expFileHandle.write("Batch1_SNP_49_Line2;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n")
1302 expFileHandle.close()
1303
1304 def _writeExpSubSNPFileSeveralBatches(self):
1305 expFileHandle = open(self._expSubSNPFileName, "w")
1306 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n")
1307 expFileHandle.write("Batch_Gene1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n")
1308 expFileHandle.write("Batch_Gene1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
1309 expFileHandle.write("Batch_Gene1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n")
1310 expFileHandle.write("Batch_Gene1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1311 expFileHandle.write("Batch_Gene1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
1312 expFileHandle.write("Batch_Gene1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1313 expFileHandle.write("Batch_Gene1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
1314 expFileHandle.write("Batch_Gene1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1315 expFileHandle.write("Batch_Gene1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n")
1316 expFileHandle.write("Batch_Gene1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n")
1317
1318 expFileHandle.write("Batch_Gene2_INS_1_Line1;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;2;1;Sequence;;;8\n")
1319 expFileHandle.write("Batch_Gene2_INS_1_Line2;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;2;2;Sequence;;;6\n")
1320 expFileHandle.write("Batch_Gene2_SNP_2_Line1;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;1\n")
1321 expFileHandle.write("Batch_Gene2_SNP_2_Line2;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;4\n")
1322 expFileHandle.write("Batch_Gene2_DEL_8_Line1;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;2;1;Sequence;;;5\n")
1323 expFileHandle.write("Batch_Gene2_DEL_8_Line2;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;2;2;Sequence;;;7\n")
1324 expFileHandle.write("Batch_Gene2_SNP_19_Line1;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;2\n")
1325 expFileHandle.write("Batch_Gene2_SNP_19_Line2;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;2\n")
1326 expFileHandle.write("Batch_Gene2_SNP_23_Line1;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;3\n")
1327 expFileHandle.write("Batch_Gene2_SNP_23_Line2;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;2\n")
1328 expFileHandle.write("Batch_Gene2_SNP_34_Line1;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;2;1;Sequence;;;3\n")
1329 expFileHandle.write("Batch_Gene2_SNP_34_Line2;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;2;2;Sequence;;;2\n")
1330 expFileHandle.write("Batch_Gene2_SNP_49_Line1;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;2;1;Sequence;;;2\n")
1331 expFileHandle.write("Batch_Gene2_SNP_49_Line2;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;2;2;Sequence;;;4\n")
1332 expFileHandle.close()
1333
1334 def _writeExpSubSNPFileSeveralBatches_different_lines_between_files(self):
1335 expFileHandle = open(self._expSubSNPFileName, "w")
1336 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n")
1337 expFileHandle.write("Batch_Gene1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n")
1338 expFileHandle.write("Batch_Gene1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
1339 expFileHandle.write("Batch_Gene1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n")
1340 expFileHandle.write("Batch_Gene1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1341 expFileHandle.write("Batch_Gene1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
1342 expFileHandle.write("Batch_Gene1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1343 expFileHandle.write("Batch_Gene1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n")
1344 expFileHandle.write("Batch_Gene1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1345 expFileHandle.write("Batch_Gene1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n")
1346 expFileHandle.write("Batch_Gene1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n")
1347
1348 expFileHandle.write("Batch_Gene2_INS_1_Line3;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;2;3;Sequence;;;8\n")
1349 expFileHandle.write("Batch_Gene2_INS_1_Line4;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;2;4;Sequence;;;6\n")
1350 expFileHandle.write("Batch_Gene2_SNP_2_Line3;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;1\n")
1351 expFileHandle.write("Batch_Gene2_SNP_2_Line4;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;4\n")
1352 expFileHandle.write("Batch_Gene2_DEL_8_Line3;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;2;3;Sequence;;;5\n")
1353 expFileHandle.write("Batch_Gene2_DEL_8_Line4;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;2;4;Sequence;;;7\n")
1354 expFileHandle.write("Batch_Gene2_SNP_19_Line3;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;2\n")
1355 expFileHandle.write("Batch_Gene2_SNP_19_Line4;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;2\n")
1356 expFileHandle.write("Batch_Gene2_SNP_23_Line3;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;3\n")
1357 expFileHandle.write("Batch_Gene2_SNP_23_Line4;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;2\n")
1358 expFileHandle.write("Batch_Gene2_SNP_34_Line3;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;2;3;Sequence;;;3\n")
1359 expFileHandle.write("Batch_Gene2_SNP_34_Line4;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;2;4;Sequence;;;2\n")
1360 expFileHandle.write("Batch_Gene2_SNP_49_Line3;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;2;3;Sequence;;;2\n")
1361 expFileHandle.write("Batch_Gene2_SNP_49_Line4;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;2;4;Sequence;;;4\n")
1362 expFileHandle.close()
1363
1364 def _writeExpSubSNPFileSeveralLineSeq(self):
1365 expFileHandle = open(self._expSubSNPFileName, "w")
1366 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n")
1367 expFileHandle.write("Batch1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;1\n")
1368 expFileHandle.write("Batch1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
1369 expFileHandle.write("Batch1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;2\n")
1370 expFileHandle.write("Batch1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1371 expFileHandle.write("Batch1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;3\n")
1372 expFileHandle.write("Batch1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1373 expFileHandle.write("Batch1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;3\n")
1374 expFileHandle.write("Batch1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n")
1375 expFileHandle.write("Batch1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;CGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;2\n")
1376 expFileHandle.write("Batch1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;CGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n")
1377 expFileHandle.close()
1378
1379
1380 def _writeExpAlleleFile(self):
1381 expFileHandle = open(self._expAlleleFileName, "w")
1382 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n")
1383 expFileHandle.write("1;T;;;\n")
1384 expFileHandle.write("2;C;;;\n")
1385 expFileHandle.write("3;G;;;\n")
1386 expFileHandle.write("4;A;;;\n")
1387 expFileHandle.close()
1388
1389 def _writeExpAlleleFileWithSnpsAndIndels(self):
1390 expFileHandle = open(self._expAlleleFileName, "w")
1391 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n")
1392 expFileHandle.write("1;T;;;\n")
1393 expFileHandle.write("2;C;;;\n")
1394 expFileHandle.write("3;G;;;\n")
1395 expFileHandle.write("4;A;;;\n")
1396 expFileHandle.write("5;---;;;\n")
1397 expFileHandle.write("6;CT;;;\n")
1398 expFileHandle.write("7;TT-;;;\n")
1399 expFileHandle.write("8;--;;;\n")
1400 expFileHandle.close()
1401
1402
1403 def _writeExpAlleleFileSeveralBatches(self):
1404 expFileHandle = open(self._expAlleleFileName, "w")
1405 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n")
1406 expFileHandle.write("1;T;;;\n")
1407 expFileHandle.write("2;C;;;\n")
1408 expFileHandle.write("3;G;;;\n")
1409 expFileHandle.write("4;A;;;\n")
1410 expFileHandle.write("5;---;;;\n")
1411 expFileHandle.write("6;CT;;;\n")
1412 expFileHandle.write("7;TT-;;;\n")
1413 expFileHandle.write("8;--;;;\n")
1414 expFileHandle.close()
1415
1416 def _writeExpIndividualFile(self):
1417 expFileHandle = open(self._expIndividualFileName, "w")
1418 expFileHandle.write("IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id\n")
1419 expFileHandle.write("1;Line1;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
1420 expFileHandle.write("2;Line2;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
1421 expFileHandle.close()
1422
1423 def _writeExpIndividualFile_different_lines_between_files(self):
1424 expFileHandle = open(self._expIndividualFileName, "w")
1425 expFileHandle.write("IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id\n")
1426 expFileHandle.write("1;Line1;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
1427 expFileHandle.write("2;Line2;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
1428 expFileHandle.write("3;Line3;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
1429 expFileHandle.write("4;Line4;;;;;;;;;;Arabidopsis thaliana;;;;;\n")
1430 expFileHandle.close()
1431
1432 def _writeExpSequenceFile(self):
1433 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w")
1434 SequenceFSAFileHandle.write(">Sequence_de_Reference\n")
1435 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1436 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w")
1437 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
1438 SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n")
1439
1440 def _writeExpSequenceFileSeveralLineSeq(self):
1441 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w")
1442 SequenceFSAFileHandle.write(">Sequence_de_Reference\n")
1443 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n")
1444 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w")
1445 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
1446 SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n")
1447
1448 def _writeExpSequenceFileWithDeletion(self):
1449 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w")
1450 SequenceFSAFileHandle.write(">Sequence_de_Reference\n")
1451 SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1452 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w")
1453 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
1454 SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n")
1455
1456 def _writeExpSequenceSeveralBatches(self):
1457 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w")
1458 SequenceFSAFileHandle.write(">Sequence_de_Reference1\n")
1459 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1460 SequenceFSAFileHandle.write(">Sequence_de_Reference2\n")
1461 SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1462 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w")
1463 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
1464 SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n")
1465 SequenceCSVFileHandle.write("Sequence_de_Reference2;Reference;;;;;Arabidopsis thaliana\n")
1466
1467 def _writeExpSequenceSeveralBatchesForSameRefSeq(self):
1468 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w")
1469 SequenceFSAFileHandle.write(">Sequence_de_Reference1\n")
1470 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1471 SequenceFSAFileHandle.write(">Sequence_de_Reference1\n")
1472 SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1473 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w")
1474 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
1475 SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n")
1476 SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n")
1477
1478 def _writeExpBatchFile(self):
1479 BatchFileHandle = open(self._expBatchFileName, "w")
1480 BatchFileHandle.write("BatchNumber: 1\n")
1481 BatchFileHandle.write("BatchName: Batch1\n")
1482 BatchFileHandle.write("GeneName: methyltransferase\n")
1483 BatchFileHandle.write("Description: \n")
1484 BatchFileHandle.write("ContactNumber: 1\n")
1485 BatchFileHandle.write("ProtocolNumber: 1\n")
1486 BatchFileHandle.write("ThematicNumber: 1\n")
1487 BatchFileHandle.write("RefSeqName: Sequence_de_Reference\n")
1488 BatchFileHandle.write("AlignmentFileName: \n")
1489 BatchFileHandle.write("SeqName: \n")
1490 BatchFileHandle.write("//\n")
1491 BatchFileHandle.close()
1492
1493 def _writeExpBatchFileSeveralBatches(self):
1494 BatchFileHandle = open(self._expBatchFileName, "w")
1495 BatchFileHandle.write("BatchNumber: 1\n")
1496 BatchFileHandle.write("BatchName: Batch_Gene1\n")
1497 BatchFileHandle.write("GeneName: Gene1\n")
1498 BatchFileHandle.write("Description: \n")
1499 BatchFileHandle.write("ContactNumber: 1\n")
1500 BatchFileHandle.write("ProtocolNumber: 1\n")
1501 BatchFileHandle.write("ThematicNumber: 1\n")
1502 BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n")
1503 BatchFileHandle.write("AlignmentFileName: \n")
1504 BatchFileHandle.write("SeqName: \n")
1505 BatchFileHandle.write("//\n")
1506 BatchFileHandle.write("BatchNumber: 2\n")
1507 BatchFileHandle.write("BatchName: Batch_Gene2\n")
1508 BatchFileHandle.write("GeneName: Gene2\n")
1509 BatchFileHandle.write("Description: \n")
1510 BatchFileHandle.write("ContactNumber: 1\n")
1511 BatchFileHandle.write("ProtocolNumber: 1\n")
1512 BatchFileHandle.write("ThematicNumber: 1\n")
1513 BatchFileHandle.write("RefSeqName: Sequence_de_Reference2\n")
1514 BatchFileHandle.write("AlignmentFileName: \n")
1515 BatchFileHandle.write("SeqName: \n")
1516 BatchFileHandle.write("//\n")
1517 BatchFileHandle.close()
1518
1519 def _writeExpBatchFileSeveralBatchesForSameRefSeq(self):
1520 BatchFileHandle = open(self._expBatchFileName, "w")
1521 BatchFileHandle.write("BatchNumber: 1\n")
1522 BatchFileHandle.write("BatchName: Batch_Gene1\n")
1523 BatchFileHandle.write("GeneName: Gene1\n")
1524 BatchFileHandle.write("Description: \n")
1525 BatchFileHandle.write("ContactNumber: 1\n")
1526 BatchFileHandle.write("ProtocolNumber: 1\n")
1527 BatchFileHandle.write("ThematicNumber: 1\n")
1528 BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n")
1529 BatchFileHandle.write("AlignmentFileName: \n")
1530 BatchFileHandle.write("SeqName: \n")
1531 BatchFileHandle.write("//\n")
1532 BatchFileHandle.write("BatchNumber: 2\n")
1533 BatchFileHandle.write("BatchName: Batch_Gene2\n")
1534 BatchFileHandle.write("GeneName: Gene2\n")
1535 BatchFileHandle.write("Description: \n")
1536 BatchFileHandle.write("ContactNumber: 1\n")
1537 BatchFileHandle.write("ProtocolNumber: 1\n")
1538 BatchFileHandle.write("ThematicNumber: 1\n")
1539 BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n")
1540 BatchFileHandle.write("AlignmentFileName: \n")
1541 BatchFileHandle.write("SeqName: \n")
1542 BatchFileHandle.write("//\n")
1543 BatchFileHandle.close()
1544
1545
1546 BatchFileHandle.close()
1547
1548 def _writeExpBatchLineFile(self):
1549 BatchLineFileHandle = open(self._expBatchLineFileName, "w")
1550 BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n")
1551 BatchLineFileHandle.write("1;;;1;\n")
1552 BatchLineFileHandle.write("2;;;1;\n")
1553 BatchLineFileHandle.close()
1554
1555
1556 def _writeExpBatchLineFileSeveralBatches(self):
1557 BatchLineFileHandle = open(self._expBatchLineFileName, "w")
1558 BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n")
1559 BatchLineFileHandle.write("1;;;1;\n")
1560 BatchLineFileHandle.write("2;;;1;\n")
1561 BatchLineFileHandle.write("1;;;2;\n")
1562 BatchLineFileHandle.write("2;;;2;\n")
1563 BatchLineFileHandle.close()
1564
1565 def _writeExpBatchLineFileSeveralBatches_different_lines_between_files(self):
1566 BatchLineFileHandle = open(self._expBatchLineFileName, "w")
1567 BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n")
1568 BatchLineFileHandle.write("1;;;1;\n")
1569 BatchLineFileHandle.write("2;;;1;\n")
1570 BatchLineFileHandle.write("3;;;2;\n")
1571 BatchLineFileHandle.write("4;;;2;\n")
1572 BatchLineFileHandle.close()
1573
1574 def _writeInputFile(self):
1575 inFileHandle = open(self._inFileName, "w")
1576 inFileHandle.write(">Sequence_de_Reference\n")
1577 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1578 inFileHandle.write(">Line1\n")
1579 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
1580 inFileHandle.write(">Line2\n")
1581 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
1582 inFileHandle.close()
1583
1584 def _writeInputFileWithSnpsAndIndels(self):
1585 inFileHandle = open(self._inFileName, "w")
1586 inFileHandle.write(">Sequence_de_Reference\n")
1587 inFileHandle.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1588 inFileHandle.write(">Line1\n")
1589 inFileHandle.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
1590 inFileHandle.write(">Line2\n")
1591 inFileHandle.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
1592 inFileHandle.close()
1593
1594 def _writeInputFileWithSeqErrorsInRefSeq(self):
1595 inFileHandle = open(self._inFileName, "w")
1596 inFileHandle.write(">Sequence_de_Reference\n")
1597 inFileHandle.write("CCTA7GCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1598 inFileHandle.write(">Line1\n")
1599 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
1600 inFileHandle.write(">Line2\n")
1601 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
1602 inFileHandle.close()
1603
1604 def _writeInputFileWithSeqErrorsInOneLineSeq(self):
1605 inFileHandle = open(self._inFileName, "w")
1606 inFileHandle.write(">Sequence_de_Reference\n")
1607 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1608 inFileHandle.write(">Line1\n")
1609 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATXAAGGCAGTAGGCAAACCTCCACAATC\n")
1610 inFileHandle.write(">Line2\n")
1611 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
1612 inFileHandle.close()
1613
1614 def _writeInputFileWithASeveralLineSeq(self):
1615 inFileHandle = open(self._inFileName, "w")
1616 inFileHandle.write(">Sequence_de_Reference\n")
1617 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n")
1618 inFileHandle.write(">Line1\n")
1619 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n")
1620 inFileHandle.write(">Line2\n")
1621 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA\nCGCAGTAGCCAAA\nCCTCCACAATA\n")
1622 inFileHandle.close()
1623
1624
1625 def _writeInputFileWithUpcaseAndLowcaseNucleotide(self):
1626 inFileHandle = open(self._inFileName, "w")
1627 inFileHandle.write(">Sequence_de_Reference\n")
1628 inFileHandle.write("CCTAAGCCATTGCTTGGtGATTATGAAGgCAGTAGTCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n")
1629 inFileHandle.write(">Line1\n")
1630 inFileHandle.write("CCTTAGCCATTGCtTGGTGACTATGAAGGcAGTAGGCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n")
1631 inFileHandle.write(">Line2\n")
1632 inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\nCGCAGTAGCCAAA\nCCTCCACAATA\n")
1633 inFileHandle.close()
1634
1635 def _writeInputFileWith2SeqsWithTheSameName(self):
1636 inFileHandle = open(self._inFileName, "w")
1637 inFileHandle.write(">Sequence_de_Reference\n")
1638 inFileHandle.write("CCTAAGCCATTGCTTGGtGATTATGAAGgCAGTAGTCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n")
1639 inFileHandle.write(">Line1\n")
1640 inFileHandle.write("CCTTAGCCATTGCtTGGTGACTATGAAGGcAGTAGGCAAACCTCCACAATC\n")
1641 inFileHandle.write(">Line2\n")
1642 inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\n")
1643 inFileHandle.write(">Line2\n")
1644 inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\n")
1645 inFileHandle.close()
1646
1647 def _writeInputFileBatchWithPotentialDooblons(self):
1648 inFileHandle = open(self._inFileName, "w")
1649 inFileHandle.write(">AU247387ref\n")
1650 inFileHandle.write("CACTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGTTCTGGTTACTCTTCAATTTGGGCATGCTTAATTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTTTATAGCCTGCTCACCAACATGTGATCTGTTCTTTGTATGCTCAGGTGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCACTTGCTATACTGTCTATTTGTTCGCAGGTGATTGCAGGTCTGTGGATCCTCTGGGTTCTTTCTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTGTGCTGCAACAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCATACGAGTTGTGAACTGATGACATCCTCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG\n")
1651 inFileHandle.write(">10102\n")
1652 inFileHandle.write("NNNtatagctcctaacattcctgaagtgaagatcacrgaggacnnggctgtcaatgttgcccgctcgctgagatatgagatcaacaggggcttygctagcttgagggcgattggNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1653 inFileHandle.write(">10954\n")
1654 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1655 inFileHandle.write(">ABERAVON\n")
1656 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1657 inFileHandle.write(">CARILLON\n")
1658 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatcggtcagggccgtgatctgaagaaattcctcattgtatgttctggttactcttcaatttgggcatgcttaat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctgttctttgtatgctcaggtggttgccgg---------------------------------------------------------------------------------------------------cctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaat------------------tttaacttgtgctgcaacacttgagttcataaccaccctag------ttgtccatacgagttgtgaactgatgacatccgttctttttcccragtgcagtcttcgtggtgctctacacggtgccagttctgtatgaNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1659 inFileHandle.write(">CONCERTO\n")
1660 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacttgcagtcttcatggttctctacactgtgccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1661 inFileHandle.write(">F14-13\n")
1662 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtsaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaaNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1663 inFileHandle.write(">GAGNY\n")
1664 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatyggtcagggccgtgayctgaagaaattcctsattgtaygtNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1665 inFileHandle.write(">GREECE\n")
1666 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtsaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatyggycagggccgtgatctgaagaaattcctcattgtatgtNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1667 inFileHandle.write(">IMAGINE\n")
1668 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacttgcagtcttcatggttctctacactgtgccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1669 inFileHandle.write(">IRELAND\n")
1670 inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGACGATTCCAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGTTGCTGG---------------------------------------------------------------------------------------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACNTTGGCATATATAGGTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1671 inFileHandle.write(">NEMOF\n")
1672 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1673 inFileHandle.write(">NEMOH\n")
1674 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1675 inFileHandle.write(">POLAND\n")
1676 inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1677 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1678 inFileHandle.write(">SPAIN\n")
1679 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatcggtcagggccgtgatctgaagaaattcctcattgtatgttctggttactcttcaatttgggcatgcttaat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctgttctttgtatgctcaggtggttgccgg---------------------------------------------------------------------------------------------------cctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaat------------------tttaacttgtgctgcaacacttgagttcataaccaccctag------ttgtccatacgagttgtgaactgatgacatccgttctttttcccgagtgcagtcttcgtggtgctctacacggtgccagttctgtatgagaagtacgacgacaaggttgatgcttttggtgagaag\n")
1680 inFileHandle.write(">TRANSATE\n")
1681 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatYggccagggtcgcgacctcaagaaattcctcattgtatgttgcttgt-ctcttcaatttcaacatgcttgat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctcttctttgtatgctcaggtggttgcggg---------------------------------------------------------------------------------------------------tctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaaK------------------tataRcttgtgctgcaacacttgagttcataaccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1682 inFileHandle.write(">VIGOR\n")
1683 inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCACTTGCTATACTGTCTATTTGTTCGCAGGTGATTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG\n")
1684 inFileHandle.close()
1685
1686 def _writeRealExpAlleleFile(self):
1687 expFileHandle = open(self._expAlleleFileName, "w")
1688 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n")
1689 expFileHandle.write("1;G;;;\n")
1690 expFileHandle.write("2;T;;;\n")
1691 expFileHandle.write("3;A;;;\n")
1692 expFileHandle.write("4;C;;;\n")
1693 expFileHandle.write("5;-;;;\n")
1694 expFileHandle.close();
1695
1696 def _writeRealExpSequenceCSVFile(self):
1697 SequenceFSAFileHandle = open(self._expSequenceCSVFileName, "w")
1698 SequenceFSAFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n")
1699 SequenceFSAFileHandle.write("PpHDZ31_ref;Reference;;;;;Pinus pinaster\n")
1700 SequenceFSAFileHandle.close()
1701
1702 def _writeRealExpBatchFile(self):
1703 FileHandle = open(self._expBatchFileName, "w")
1704 FileHandle.write("BatchNumber: 1\n")
1705 FileHandle.write("BatchName: INRA_Pinus_pinaster_HDZ31-1\n")
1706 FileHandle.write("GeneName: PpHDZ31\n")
1707 FileHandle.write("Description: \n")
1708 FileHandle.write("ContactNumber: 1\n")
1709 FileHandle.write("ProtocolNumber: 1\n")
1710 FileHandle.write("ThematicNumber: 1\n")
1711 FileHandle.write("RefSeqName: PpHDZ31_ref\n")
1712 FileHandle.write("AlignmentFileName: \n")
1713 FileHandle.write("SeqName: \n")
1714 FileHandle.write("//\n")
1715 FileHandle.close()
1716
1717
1718 def _writeInputFileSeveralBatches(self):
1719 if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):
1720 os.mkdir(self._inputDirSeveralBatches)
1721
1722 inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")
1723 inFileHandle.write(">Sequence_de_Reference1\n")
1724 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1725 inFileHandle.write(">Line1\n")
1726 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
1727 inFileHandle.write(">Line2\n")
1728 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
1729 inFileHandle.close()
1730
1731 inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")
1732 inFileHandle2.write(">Sequence_de_Reference2\n")
1733 inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1734 inFileHandle2.write(">Line1\n")
1735 inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
1736 inFileHandle2.write(">Line2\n")
1737 inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
1738 inFileHandle2.close()
1739
1740 def _writeInputFileSeveralBatches_different_lines_between_files(self):
1741 if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):
1742 os.mkdir(self._inputDirSeveralBatches)
1743
1744 inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")
1745 inFileHandle.write(">Sequence_de_Reference1\n")
1746 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1747 inFileHandle.write(">Line1\n")
1748 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
1749 inFileHandle.write(">Line2\n")
1750 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
1751 inFileHandle.close()
1752
1753 inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")
1754 inFileHandle2.write(">Sequence_de_Reference2\n")
1755 inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1756 inFileHandle2.write(">Line3\n")
1757 inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
1758 inFileHandle2.write(">Line4\n")
1759 inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
1760 inFileHandle2.close()
1761
1762 def _writeInputFileSeveralBatches_different_lines_and_same_refseq_between_files(self):
1763 if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)):
1764 os.mkdir(self._inputDirSeveralBatches)
1765
1766 inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w")
1767 inFileHandle.write(">Sequence_de_Reference1\n")
1768 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1769 inFileHandle.write(">Line1\n")
1770 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
1771 inFileHandle.write(">Line2\n")
1772 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
1773 inFileHandle.close()
1774
1775 inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w")
1776 inFileHandle2.write(">Sequence_de_Reference1\n")
1777 inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n")
1778 inFileHandle2.write(">Line3\n")
1779 inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n")
1780 inFileHandle2.write(">Line4\n")
1781 inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA")
1782 inFileHandle2.close()
1783
1784
1785 if __name__ == "__main__":
1786 unittest.main()