Mercurial > repos > yufei-luo > s_mart
comparison smart_toolShed/commons/core/parsing/test/Test_Multifasta2SNPFile.py @ 0:e0f8dcca02ed
Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author | yufei-luo |
---|---|
date | Thu, 17 Jan 2013 10:52:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e0f8dcca02ed |
---|---|
1 import os | |
2 import shutil | |
3 import unittest | |
4 from commons.core.utils.FileUtils import FileUtils | |
5 from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile | |
6 from commons.core.parsing.Multifasta2SNPFile import ReferenceBioseqAndLinesBioseqDBWrapper | |
7 from commons.core.seq.Bioseq import Bioseq | |
8 from commons.core.seq.BioseqDB import BioseqDB | |
9 from smac_pipe.tests.Utils4Test import Utils4Test | |
10 | |
11 | |
12 class Test_Multifasta2SNPFile(unittest.TestCase): | |
13 # TODO TEST LOGFILE | |
14 def setUp(self): | |
15 os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"]) | |
16 self._inFileName = "multifasta_input.fasta" | |
17 | |
18 self._expSubSNPFileName = "%s/commons/core/parsing/test/expSubSNP.csv" % os.environ["REPET_PATH"] | |
19 self._expAlleleFileName = "%s/commons/core/parsing/test/expAllele.csv" % os.environ["REPET_PATH"] | |
20 | |
21 self._expIndividualFileName = "%s/commons/core/parsing/test/expIndividual.csv" % os.environ["REPET_PATH"] | |
22 self._expSequenceFSAFileName = "%s/commons/core/parsing/test/expSequences.fsa" % os.environ["REPET_PATH"] | |
23 self._expSequenceCSVFileName = "%s/commons/core/parsing/test/expSequences.csv" % os.environ["REPET_PATH"] | |
24 self._expBatchFileName = "%s/commons/core/parsing/test/expBatch.txt" % os.environ["REPET_PATH"] | |
25 self._expBatchLineFileName = "%s/commons/core/parsing/test/expBatchLine.csv" % os.environ["REPET_PATH"] | |
26 | |
27 self._realInputFileName = "data/real_multifasta_input.fasta" | |
28 self._realExpSubSNPFileName = "data/realExpSubSNP.csv" | |
29 self._realExpSequenceFSAFileName = "data/realExpSequences.fsa" | |
30 self._realExpBatchLineFileName = "data/realExpBatchLine.csv" | |
31 self._realExpIndividualFileName = "data/realExpIndividual.csv" | |
32 | |
33 self._inputDirSeveralBatches = "%s/commons/core/parsing/test/severalBatchDir" % os.environ["REPET_PATH"] | |
34 | |
35 self._obsSubSNPFileName = "SubSNP.csv" | |
36 self._obsAlleleFileName = "Allele.csv" | |
37 self._obsIndividualFileName = "Individual.csv" | |
38 self._obsSequenceFSAFileName = "Sequences.fsa" | |
39 self._obsSequenceCSVFileName = "Sequences.csv" | |
40 self._obsBatchFileName = "Batch.txt" | |
41 self._obsBatchLineFileName = "BatchLine.csv" | |
42 | |
43 self._fileUtils = FileUtils() | |
44 | |
45 def tearDown(self): | |
46 os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"]) | |
47 logFileName = "multifasta2SNP.log" | |
48 if self._fileUtils.isRessourceExists(self._inFileName): | |
49 os.remove(self._inFileName) | |
50 if self._fileUtils.isRessourceExists(self._obsSubSNPFileName): | |
51 os.remove(self._obsSubSNPFileName) | |
52 if self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered"): | |
53 os.remove(self._obsSubSNPFileName + "_filtered") | |
54 if self._fileUtils.isRessourceExists(self._obsAlleleFileName): | |
55 os.remove(self._obsAlleleFileName) | |
56 if self._fileUtils.isRessourceExists(self._obsIndividualFileName): | |
57 os.remove(self._obsIndividualFileName) | |
58 if self._fileUtils.isRessourceExists(self._obsSequenceFSAFileName): | |
59 os.remove(self._obsSequenceFSAFileName) | |
60 if self._fileUtils.isRessourceExists(self._obsSequenceCSVFileName): | |
61 os.remove(self._obsSequenceCSVFileName) | |
62 if self._fileUtils.isRessourceExists(self._obsBatchFileName): | |
63 os.remove(self._obsBatchFileName) | |
64 if self._fileUtils.isRessourceExists(self._obsBatchLineFileName): | |
65 os.remove(self._obsBatchLineFileName) | |
66 | |
67 if self._fileUtils.isRessourceExists(self._expSubSNPFileName): | |
68 os.remove(self._expSubSNPFileName) | |
69 if self._fileUtils.isRessourceExists(self._realExpSubSNPFileName + "_filtered"): | |
70 os.remove(self._realExpSubSNPFileName + "_filtered") | |
71 if self._fileUtils.isRessourceExists(self._expAlleleFileName): | |
72 os.remove(self._expAlleleFileName) | |
73 if self._fileUtils.isRessourceExists(self._expIndividualFileName): | |
74 os.remove(self._expIndividualFileName) | |
75 if self._fileUtils.isRessourceExists(self._expSequenceFSAFileName): | |
76 os.remove(self._expSequenceFSAFileName) | |
77 if self._fileUtils.isRessourceExists(self._expSequenceCSVFileName): | |
78 os.remove(self._expSequenceCSVFileName) | |
79 if self._fileUtils.isRessourceExists(self._expBatchFileName): | |
80 os.remove(self._expBatchFileName) | |
81 if self._fileUtils.isRessourceExists(self._expBatchLineFileName): | |
82 os.remove(self._expBatchLineFileName) | |
83 | |
84 if self._fileUtils.isRessourceExists(logFileName): | |
85 os.remove(logFileName) | |
86 if self._fileUtils.isRessourceExists(self._inputDirSeveralBatches): | |
87 shutil.rmtree(self._inputDirSeveralBatches) | |
88 | |
89 | |
90 def test_runOneBatch(self): | |
91 self._writeInputFile() | |
92 self._writeExpSubSNPFile() | |
93 self._writeExpAlleleFile() | |
94 self._writeExpIndividualFile() | |
95 self._writeExpSequenceFile() | |
96 self._writeExpBatchFile() | |
97 self._writeExpBatchLineFile() | |
98 | |
99 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase") | |
100 multifasta2SNPFile.runOneBatch(self._inFileName) | |
101 | |
102 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName)) | |
103 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName)) | |
104 | |
105 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName)) | |
106 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName)) | |
107 | |
108 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName)) | |
109 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName)) | |
110 | |
111 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName)) | |
112 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName)) | |
113 | |
114 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName)) | |
115 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName)) | |
116 | |
117 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName)) | |
118 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName)) | |
119 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName)) | |
120 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName)) | |
121 | |
122 def test_runOneBatch_with_a_real_input_file(self): | |
123 self._writeRealExpAlleleFile() | |
124 self._writeRealExpSequenceCSVFile() | |
125 self._writeRealExpBatchFile() | |
126 | |
127 multifasta2SNPFile = Multifasta2SNPFile("Pinus pinaster", "INRA_Pinus_pinaster_HDZ31-1", "PpHDZ31") | |
128 multifasta2SNPFile.runOneBatch(self._realInputFileName) | |
129 | |
130 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName)) | |
131 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpIndividualFileName, self._obsIndividualFileName)) | |
132 | |
133 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName)) | |
134 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpSequenceFSAFileName, self._obsSequenceFSAFileName)) | |
135 | |
136 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName)) | |
137 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName)) | |
138 | |
139 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName)) | |
140 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName)) | |
141 | |
142 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName)) | |
143 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpBatchLineFileName, self._obsBatchLineFileName)) | |
144 | |
145 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName)) | |
146 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName)) | |
147 | |
148 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName)) | |
149 self.assertTrue(FileUtils.are2FilesIdentical(self._realExpSubSNPFileName , self._obsSubSNPFileName)) | |
150 | |
151 def test_runOneBatch_with_errors_in_refSeq(self): | |
152 self._writeInputFileWithSeqErrorsInRefSeq() | |
153 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase") | |
154 self.assertRaises(Exception, multifasta2SNPFile.runOneBatch, self._inFileName, self._obsSubSNPFileName) | |
155 | |
156 def test_runOneBatch_with_errors_in_lineSeq(self): | |
157 self._writeInputFileWithSeqErrorsInOneLineSeq() | |
158 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase") | |
159 self.assertRaises(Exception, multifasta2SNPFile.runOneBatch, self._inFileName, self._obsSubSNPFileName) | |
160 | |
161 def test_runOneBatch_with_a_several_lineSeq(self): | |
162 self._writeInputFileWithASeveralLineSeq() | |
163 self._writeExpSubSNPFileSeveralLineSeq() | |
164 self._writeExpAlleleFile() | |
165 self._writeExpIndividualFile() | |
166 self._writeExpSequenceFileSeveralLineSeq() | |
167 self._writeExpBatchFile() | |
168 self._writeExpBatchLineFile() | |
169 | |
170 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase") | |
171 multifasta2SNPFile.runOneBatch(self._inFileName) | |
172 | |
173 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName)) | |
174 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName)) | |
175 | |
176 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName)) | |
177 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName)) | |
178 | |
179 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName)) | |
180 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName)) | |
181 | |
182 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName)) | |
183 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName)) | |
184 | |
185 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName)) | |
186 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName)) | |
187 | |
188 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName)) | |
189 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName)) | |
190 | |
191 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName)) | |
192 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName)) | |
193 | |
194 def test_runOneBatch_with_2_seqs_with_the_same_name(self): | |
195 self._writeInputFileWith2SeqsWithTheSameName() | |
196 batchName = "batch1" | |
197 taxon = "Arabidopsis thaliana" | |
198 gene = "methyltransferase" | |
199 isSysExitRaised = False | |
200 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
201 | |
202 try: | |
203 multifasta2SNPFile.runOneBatch(self._inFileName) | |
204 except SystemExit: | |
205 isSysExitRaised = True | |
206 | |
207 self.assertTrue(isSysExitRaised) | |
208 | |
209 def test_runOneBatch_with_indels_and_snps(self): | |
210 self._writeInputFileWithSnpsAndIndels() | |
211 self._writeExpSubSNPFileWithSnpsAndIndels() | |
212 self._writeExpAlleleFileWithSnpsAndIndels() | |
213 self._writeExpIndividualFile() | |
214 self._writeExpSequenceFileWithDeletion() | |
215 self._writeExpBatchFile() | |
216 self._writeExpBatchLineFile() | |
217 | |
218 batchName = "Batch1" | |
219 taxon = "Arabidopsis thaliana" | |
220 gene = "methyltransferase" | |
221 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
222 multifasta2SNPFile.runOneBatch(self._inFileName) | |
223 | |
224 self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName)) | |
225 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName)) | |
226 | |
227 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName)) | |
228 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName)) | |
229 | |
230 self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName)) | |
231 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName)) | |
232 | |
233 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName)) | |
234 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName)) | |
235 | |
236 self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName)) | |
237 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName)) | |
238 | |
239 self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName)) | |
240 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName)) | |
241 | |
242 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName)) | |
243 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName)) | |
244 | |
245 def test_runOneBatchWithPotentialDooblons(self): | |
246 self._writeInputFileBatchWithPotentialDooblons() | |
247 | |
248 batchName = "Batch_AU247387" | |
249 taxon = "Arabidopsis thaliana" | |
250 gene = "methyltransferase" | |
251 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
252 multifasta2SNPFile.runOneBatch(self._inFileName) | |
253 self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName)) | |
254 | |
255 expSubSNPFile = "data/ExpPotDooblonsSubSNP.csv" | |
256 | |
257 Utils4Test.removeOneSpecifiedColumn(expSubSNPFile, ";", 8) | |
258 Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName, ";", 8) | |
259 | |
260 Utils4Test.removeOneSpecifiedColumn(expSubSNPFile + "_filtered", ";", 9) | |
261 Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName + "_filtered", ";", 9) | |
262 | |
263 Utils4Test.removeOneSpecifiedColumn(expSubSNPFile + "_filtered_filtered", ";", 13) | |
264 Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName + "_filtered_filtered", ";", 13) | |
265 | |
266 comparableExpSubSNPFile = expSubSNPFile + "_filtered_filtered_filtered" | |
267 comparableObsSubSNPFile = self._obsSubSNPFileName + "_filtered_filtered_filtered" | |
268 | |
269 self.assertTrue(FileUtils.isRessourceExists(comparableExpSubSNPFile)) | |
270 self.assertTrue(FileUtils.isRessourceExists(comparableObsSubSNPFile)) | |
271 self.assertTrue(FileUtils.are2FilesIdentical(comparableExpSubSNPFile, comparableObsSubSNPFile)) | |
272 | |
273 if(self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered")): | |
274 os.remove(self._obsSubSNPFileName + "_filtered") | |
275 if(self._fileUtils.isRessourceExists(expSubSNPFile + "_filtered")): | |
276 os.remove(expSubSNPFile + "_filtered") | |
277 | |
278 if(self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered_filtered")): | |
279 os.remove(self._obsSubSNPFileName + "_filtered_filtered") | |
280 if(self._fileUtils.isRessourceExists(expSubSNPFile + "_filtered_filtered")): | |
281 os.remove(expSubSNPFile + "_filtered_filtered") | |
282 | |
283 if self._fileUtils.isRessourceExists(comparableExpSubSNPFile): | |
284 os.remove(comparableExpSubSNPFile) | |
285 if self._fileUtils.isRessourceExists(comparableObsSubSNPFile): | |
286 os.remove(comparableObsSubSNPFile) | |
287 | |
288 def test_runSeveralBatches(self): | |
289 self._writeInputFileSeveralBatches() | |
290 self._writeExpSubSNPFileSeveralBatches() | |
291 self._writeExpAlleleFileSeveralBatches() | |
292 self._writeExpIndividualFile() | |
293 self._writeExpSequenceSeveralBatches() | |
294 self._writeExpBatchFileSeveralBatches() | |
295 self._writeExpBatchLineFileSeveralBatches() | |
296 | |
297 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana") | |
298 multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches) | |
299 | |
300 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsAlleleFileName)) | |
301 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._inputDirSeveralBatches + "/" + self._obsAlleleFileName)) | |
302 | |
303 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" +self._obsIndividualFileName)) | |
304 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._inputDirSeveralBatches + "/" + self._obsIndividualFileName)) | |
305 | |
306 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName)) | |
307 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName)) | |
308 | |
309 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName)) | |
310 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName)) | |
311 | |
312 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchFileName)) | |
313 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._inputDirSeveralBatches + "/" + self._obsBatchFileName)) | |
314 | |
315 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName)) | |
316 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName)) | |
317 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName)) | |
318 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName)) | |
319 | |
320 def test_runSeveralBatches_different_lines_between_files(self): | |
321 self._writeInputFileSeveralBatches_different_lines_between_files() | |
322 self._writeExpSubSNPFileSeveralBatches_different_lines_between_files() | |
323 self._writeExpAlleleFileSeveralBatches() | |
324 self._writeExpIndividualFile_different_lines_between_files() | |
325 self._writeExpSequenceSeveralBatches() | |
326 self._writeExpBatchFileSeveralBatches() | |
327 self._writeExpBatchLineFileSeveralBatches_different_lines_between_files() | |
328 | |
329 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana") | |
330 multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches) | |
331 | |
332 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsAlleleFileName)) | |
333 self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._inputDirSeveralBatches + "/" + self._obsAlleleFileName)) | |
334 | |
335 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" +self._obsIndividualFileName)) | |
336 self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._inputDirSeveralBatches + "/" + self._obsIndividualFileName)) | |
337 | |
338 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName)) | |
339 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName)) | |
340 | |
341 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName)) | |
342 self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName)) | |
343 | |
344 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchFileName)) | |
345 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._inputDirSeveralBatches + "/" + self._obsBatchFileName)) | |
346 | |
347 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName)) | |
348 self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName)) | |
349 self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName)) | |
350 self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName)) | |
351 | |
352 def test_runSeveralBatches_different_lines_and_same_refseq_between_files(self): | |
353 self._writeInputFileSeveralBatches_different_lines_and_same_refseq_between_files() | |
354 self._writeExpSubSNPFileSeveralBatches_different_lines_between_files() | |
355 self._writeExpAlleleFileSeveralBatches() | |
356 self._writeExpIndividualFile_different_lines_between_files() | |
357 self._writeExpSequenceSeveralBatchesForSameRefSeq() | |
358 self._writeExpBatchFileSeveralBatchesForSameRefSeq() | |
359 self._writeExpBatchLineFileSeveralBatches_different_lines_between_files() | |
360 | |
361 multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana") | |
362 try: | |
363 multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches) | |
364 except Exception, e : | |
365 self.assertRaises(Exception, e) | |
366 | |
367 def test_detectSNPAndIndels(self): | |
368 refBioseq = Bioseq() | |
369 alignedBioseqDB = BioseqDB() | |
370 batchName = "batch1" | |
371 taxon = "Arabidopsis thaliana" | |
372 gene = "methyltransferase" | |
373 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
374 refBioseq.sequence = "ATTCGCGTATGCGTATGCTT" | |
375 refBioseq.header = "reference" | |
376 | |
377 bs1 = Bioseq( "line1", "ATCCGCGTATGCGTATGATT" ) | |
378 bs2 = Bioseq( "line2", "ATTCGTGTATGCGTATGGTT" ) | |
379 | |
380 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
381 | |
382 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
383 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} | |
384 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) | |
385 | |
386 dExpAllele = {'C': 1, 'A': 2, 'T': 3, 'G': 4 } | |
387 lExpSNP = [{'subSNPName': batchName + "_SNP_3_line1", 'position': 3, 'lineName': 1, 'allele': 1, '5flank': "AT", '3flank': "CGCGTATGCGTATGATT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
388 {'subSNPName': batchName + "_SNP_3_line2", 'position': 3, 'lineName': 2, 'allele': 3, '5flank': "AT", '3flank': "CGTGTATGCGTATGGTT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
389 {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 3, '5flank': "ATTCG", '3flank': "GTATGCGTATGGTT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
390 {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 1, '5flank': "ATCCG", '3flank': "GTATGCGTATGATT",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
391 {'subSNPName': batchName + "_SNP_18_line1", 'position': 18, 'lineName': 1, 'allele': 2, '5flank': "ATCCGCGTATGCGTATG", '3flank': "TT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
392 {'subSNPName': batchName + "_SNP_18_line2", 'position': 18, 'lineName': 2, 'allele': 4, '5flank': "ATTCGTGTATGCGTATG", '3flank': "TT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}] | |
393 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, | |
394 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},] | |
395 | |
396 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) | |
397 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) | |
398 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) | |
399 | |
400 def test_detectSNPAndIndels_no_polym(self): | |
401 refBioseq = Bioseq() | |
402 alignedBioseqDB = BioseqDB() | |
403 batchName = "batch1" | |
404 taxon = "Arabidopsis thaliana" | |
405 gene = "methyltransferase" | |
406 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
407 refBioseq.sequence = "ATTCGCGTATGCGTATGCTT" | |
408 refBioseq.header = "reference" | |
409 | |
410 bs1 = Bioseq( "line1", "ATTCGCGTATGCGTATGCTT" ) | |
411 bs2 = Bioseq( "line2", "ATTCGCGTATGCGTATGCTT" ) | |
412 | |
413 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
414 | |
415 instance = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
416 | |
417 multifasta2SNPFile.detectSNPsAndIndels(instance) | |
418 | |
419 lExpSNP = [] | |
420 | |
421 self.assertEquals(lExpSNP, multifasta2SNPFile._lSubSNPFileResults) | |
422 | |
423 def test_detectSNPAndIndels_with_only_dels(self): | |
424 refBioseq = Bioseq() | |
425 alignedBioseqDB = BioseqDB() | |
426 batchName = "batch1" | |
427 taxon = "Arabidopsis thaliana" | |
428 gene = "methyltransferase" | |
429 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
430 refBioseq.sequence = "ATTACCGAA" | |
431 refBioseq.header = "reference" | |
432 | |
433 bs1 = Bioseq( "line1", "A--ACCGAA" ) | |
434 bs2 = Bioseq( "line2", "---ACCGAA" ) | |
435 | |
436 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
437 | |
438 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
439 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} | |
440 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) | |
441 | |
442 dExpAllele = {'A--': 1, '---': 2} | |
443 lExpSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 3}, | |
444 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 1, '5flank': "", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 3}] | |
445 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, | |
446 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}] | |
447 | |
448 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) | |
449 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) | |
450 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) | |
451 | |
452 def test_detectSNPAndIndels_with_dels_and_snps(self): | |
453 refBioseq = Bioseq() | |
454 alignedBioseqDB = BioseqDB() | |
455 batchName = "batch1" | |
456 taxon = "Arabidopsis thaliana" | |
457 gene = "methyltransferase" | |
458 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
459 refBioseq.sequence = "ATTACCGAA" | |
460 refBioseq.header = "reference" | |
461 | |
462 bs1 = Bioseq( "line1", "A--ACCGAA" ) | |
463 bs2 = Bioseq( "line2", "---ACCGAA" ) | |
464 bs3 = Bioseq( "line3", "ATTACCGGA" ) | |
465 bs4 = Bioseq( "line4", "----CCGAA" ) | |
466 | |
467 alignedBioseqDB.setData( [ bs1, bs2, bs3, bs4 ] ) | |
468 | |
469 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
470 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} | |
471 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) | |
472 | |
473 dExpAllele = {'G': 1, 'A--A': 2, '---A': 3, '----': 4, 'ATTA': 5, 'A': 6} | |
474 lExpSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, | |
475 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, | |
476 {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
477 {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
478 {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
479 {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
480 {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, | |
481 {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}] | |
482 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, | |
483 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}, | |
484 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}, | |
485 {'individualNumber': 4, 'individualName': "line4", 'scientificName': "Arabidopsis thaliana"}] | |
486 | |
487 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) | |
488 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) | |
489 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) | |
490 | |
491 def test_detectSNPAndIndels_with_only_inserts(self): | |
492 refBioseq = Bioseq() | |
493 alignedBioseqDB = BioseqDB() | |
494 batchName = "batch1" | |
495 taxon = "Arabidopsis thaliana" | |
496 gene = "methyltransferase" | |
497 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
498 refBioseq.sequence = "A--ACCGAA" | |
499 refBioseq.header = "reference" | |
500 | |
501 bs1 = Bioseq( "line1", "A--ACCGAA" ) | |
502 bs2 = Bioseq( "line2", "AG-ACCGAA" ) | |
503 bs3 = Bioseq( "line3", "ATTACCGAA" ) | |
504 | |
505 alignedBioseqDB.setData( [ bs1, bs2, bs3 ] ) | |
506 | |
507 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
508 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} | |
509 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) | |
510 | |
511 dExpAllele = {'G-': 1, 'TT': 2, '--': 3} | |
512 lExpSNP = [{'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 1, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, | |
513 {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 2, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, | |
514 {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 3, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}] | |
515 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, | |
516 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}, | |
517 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}] | |
518 | |
519 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) | |
520 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) | |
521 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) | |
522 | |
523 def test_detectSNPAndIndels_with_snps_and_inserts(self): | |
524 refBioseq = Bioseq() | |
525 alignedBioseqDB = BioseqDB() | |
526 batchName = "batch1" | |
527 taxon = "Arabidopsis thaliana" | |
528 gene = "methyltransferase" | |
529 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
530 refBioseq.sequence = "A--ACCGAA" | |
531 refBioseq.header = "reference" | |
532 | |
533 bs1 = Bioseq( "line1", "A--ACCGAA" ) | |
534 bs2 = Bioseq( "line2", "AG-ACCGAA" ) | |
535 bs3 = Bioseq( "line3", "ATTACCGCA" ) | |
536 | |
537 alignedBioseqDB.setData( [ bs1, bs2, bs3 ] ) | |
538 | |
539 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
540 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} | |
541 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) | |
542 | |
543 dExpAllele = {'C': 1, 'G-': 2, 'TT': 3, '--': 4, 'A' : 5} | |
544 lExpSNP = [{'subSNPName': batchName + "_SNP_6_line3", 'position': 6, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
545 {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 5, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
546 {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 5, '5flank': "AG-ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
547 {'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, | |
548 {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 3, '5flank': "A", '3flank': "ACCGCA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, | |
549 {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 4, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}] | |
550 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, | |
551 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}, | |
552 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}] | |
553 | |
554 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) | |
555 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) | |
556 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) | |
557 | |
558 def test_detectSNPAndIndels_with_snps_inserts_and_dels(self): | |
559 refBioseq = Bioseq() | |
560 alignedBioseqDB = BioseqDB() | |
561 batchName = "batch1" | |
562 taxon = "Arabidopsis thaliana" | |
563 gene = "methyltransferase" | |
564 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
565 refBioseq.sequence = "A--ACCGAATATAC" | |
566 refBioseq.header = "reference" | |
567 | |
568 bs1 = Bioseq( "line1", "A--ACCGAATATAC" ) | |
569 bs2 = Bioseq( "line2", "AG-ACCGAAT--AC" ) | |
570 bs3 = Bioseq( "line3", "ATTACCGCA-----" ) | |
571 | |
572 alignedBioseqDB.setData( [ bs1, bs2, bs3 ] ) | |
573 | |
574 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
575 multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} | |
576 multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) | |
577 | |
578 dExpAllele = {'C': 1, 'G-': 2, 'T--AC': 3, 'TT': 4, '-----': 5, '--': 6, 'TATAC': 7, 'A': 8} | |
579 lExpSNP = [{'subSNPName': batchName + "_SNP_6_line3", 'position': 6, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A-----", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
580 {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 8, '5flank': "A--ACCG", '3flank': "ATATAC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
581 {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 8, '5flank': "AG-ACCG", '3flank': "AT--AC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
582 | |
583 {'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "A", '3flank': "ACCGAAT--AC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, | |
584 {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 4, '5flank': "A", '3flank': "ACCGCA-----", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, | |
585 {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 6, '5flank': "A", '3flank': "ACCGAATATAC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, | |
586 | |
587 {'subSNPName': batchName + "_DEL_8_line2", 'position': 8, 'lineName': 2, 'allele': 3, '5flank': "AG-ACCGAA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5}, | |
588 {'subSNPName': batchName + "_DEL_8_line3", 'position': 8, 'lineName': 3, 'allele': 5, '5flank': "ATTACCGCA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5}, | |
589 {'subSNPName': batchName + "_DEL_8_line1", 'position': 8, 'lineName': 1, 'allele': 7, '5flank': "A--ACCGAA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5}] | |
590 lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, | |
591 {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}, | |
592 {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}] | |
593 | |
594 self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) | |
595 self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) | |
596 self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) | |
597 | |
598 def test_createWrapperFromFile_with_upcase_and_lowcase_nucleotide(self): | |
599 self._writeInputFileWithUpcaseAndLowcaseNucleotide() | |
600 batchName = "batch1" | |
601 taxon = "Arabidopsis thaliana" | |
602 gene = "methyltransferase" | |
603 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
604 | |
605 expLineBioseqDB = BioseqDB() | |
606 expRefBioseq = Bioseq("Sequence_de_Reference",\ | |
607 "CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA") | |
608 iBioSeq = Bioseq("Line1","CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA") | |
609 expLineBioseqDB.add ( iBioSeq ) | |
610 iBioSeq = Bioseq("Line2","CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA") | |
611 expLineBioseqDB.add ( iBioSeq ) | |
612 | |
613 expBioseqDBWrapper = ReferenceBioseqAndLinesBioseqDBWrapper (expRefBioseq, expLineBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
614 | |
615 obsBioseqDBWrapper = multifasta2SNPFile.createWrapperFromFile(self._inFileName) | |
616 | |
617 self.assertEquals(obsBioseqDBWrapper._iReferenceBioseq, expBioseqDBWrapper._iReferenceBioseq) | |
618 self.assertEquals(obsBioseqDBWrapper._iLinesBioseqDB, expBioseqDBWrapper._iLinesBioseqDB) | |
619 | |
620 def test_checkHeaderAlphabet(self): | |
621 # header ALPHABET [^a-zA-Z0-9_-:] | |
622 batchName = "batch1" | |
623 taxon = "Arabidopsis thaliana" | |
624 gene = "methyltransferase" | |
625 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
626 strToBeCheck="abcdefghijklmnopqrstuvwxyz0912834567_:-" | |
627 self.assertTrue ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck)) | |
628 strToBeCheck="ABCDEFGHIJKLMNOPQRSTUVWXYZ0912834567_:-" | |
629 self.assertTrue ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck)) | |
630 | |
631 def test_checkHeaderAlphabet_empty_string(self): | |
632 batchName = "batch1" | |
633 taxon = "Arabidopsis thaliana" | |
634 gene = "methyltransferase" | |
635 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
636 strToBeCheck="" | |
637 self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck)) | |
638 | |
639 def test_checkHeaderAlphabet_space(self): | |
640 batchName = "batch1" | |
641 taxon = "Arabidopsis thaliana" | |
642 gene = "methyltransferase" | |
643 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
644 strToBeCheck=" " | |
645 self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck)) | |
646 | |
647 def test_checkHeaderAlphabet_non_aphabetical(self): | |
648 batchName = "batch1" | |
649 taxon = "Arabidopsis thaliana" | |
650 gene = "methyltransferase" | |
651 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
652 strToBeCheck="}" | |
653 self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck)) | |
654 | |
655 def test_isDNA_bases( self ): | |
656 batchName = "batch1" | |
657 taxon = "Arabidopsis thaliana" | |
658 gene = "methyltransferase" | |
659 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
660 strToBeCheck="TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA" | |
661 self.assertTrue ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) | |
662 | |
663 def test_isDNA_bases_non_DNA_letter( self ): | |
664 batchName = "batch1" | |
665 taxon = "Arabidopsis thaliana" | |
666 gene = "methyltransferase" | |
667 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
668 strToBeCheck="XTAGTTGATCA" | |
669 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) | |
670 | |
671 def test_isDNA_bases_carriage_return( self ): | |
672 batchName = "batch1" | |
673 taxon = "Arabidopsis thaliana" | |
674 gene = "methyltransferase" | |
675 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
676 strToBeCheck="TA\nGTTGATCA" | |
677 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) | |
678 | |
679 def test_isDNA_bases_empty_string( self ): | |
680 batchName = "batch1" | |
681 taxon = "Arabidopsis thaliana" | |
682 gene = "methyltransferase" | |
683 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
684 strToBeCheck="" | |
685 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) | |
686 | |
687 def test_isDNA_bases_space( self ): | |
688 batchName = "batch1" | |
689 taxon = "Arabidopsis thaliana" | |
690 gene = "methyltransferase" | |
691 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
692 strToBeCheck=" " | |
693 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) | |
694 | |
695 def test_isDNA_bases_IUPAC_letter_but_non_DNA_bases( self ): | |
696 batchName = "batch1" | |
697 taxon = "Arabidopsis thaliana" | |
698 gene = "methyltransferase" | |
699 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
700 strToBeCheck="UMWSB" | |
701 self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) | |
702 | |
703 def test_getLineAsAHeader (self): | |
704 lineToBeCheck=">test on good header" | |
705 batchName = "batch1" | |
706 expHeader = "test_on_good_header" | |
707 taxon = "Arabidopsis thaliana" | |
708 gene = "methyltransferase" | |
709 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
710 obsHeader = multifasta2SNPFile.getLineAsAHeader(lineToBeCheck) | |
711 self.assertEqual(obsHeader,expHeader) | |
712 | |
713 def test_getLineAsAHeader_warning_bad_header_tag_omitted(self): | |
714 | |
715 lineToBeCheck="test on bad header with tag omitted" | |
716 batchName = "batch1" | |
717 taxon = "Arabidopsis thaliana" | |
718 gene = "methyltransferase" | |
719 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
720 try : | |
721 expHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck ) | |
722 except Exception, e : | |
723 self.assertRaises(Exception, e , self._inFileName, self._obsSubSNPFileName) | |
724 | |
725 def test_getLineAsAHeader_warning_repeated_blanks_removed(self): | |
726 | |
727 lineToBeCheck =">test on header \twith warning" | |
728 expHeader = "test_on_header_with_warning" | |
729 batchName = "batch1" | |
730 taxon = "Arabidopsis thaliana" | |
731 gene = "methyltransferase" | |
732 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
733 obsHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck ) | |
734 self.assertEquals( obsHeader, expHeader) | |
735 self.assertRaises(Exception, multifasta2SNPFile.getLineAsAHeader( lineToBeCheck ) , self._inFileName, self._obsSubSNPFileName) | |
736 | |
737 def test_getLineAsAHeader_fatal_error_bad_header(self): | |
738 lineToBeCheck=">test\on bad header with fatal error" | |
739 | |
740 batchName = "batch1" | |
741 taxon = "Arabidopsis thaliana" | |
742 gene = "methyltransferase" | |
743 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
744 try : | |
745 expHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck ) | |
746 except Exception, e : | |
747 self.assertRaises(Exception, e , self._inFileName, self._obsSubSNPFileName) | |
748 | |
749 def test_isHeaderInRefSeqList(self): | |
750 header = "line1" | |
751 bs1 = Bioseq( "line1", "A--ACCGAATATAC" ) | |
752 bs2 = Bioseq( "line2", "AG-ACCGAAT--AC" ) | |
753 bs3 = Bioseq( "line3", "ATTACCGCA-----" ) | |
754 | |
755 batchName = "batch1" | |
756 taxon = "Arabidopsis thaliana" | |
757 gene = "methyltransferase" | |
758 | |
759 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
760 multifasta2SNPFile._lRefSequences = [bs1, bs2, bs3] | |
761 try: | |
762 isHeader = multifasta2SNPFile.isHeaderInRefSeqList(header) | |
763 except Exception, e : | |
764 self.assertRaises(Exception, e) | |
765 | |
766 def test_completeAlleleSetWithCurrentAllele_one_allele_added(self): | |
767 dAlleleSetInInput = {"A" : 1, | |
768 "T" : 2, | |
769 "G" : 3} | |
770 alleleToAdd = "C" | |
771 dAlleleExpSet = {"A" : 1, | |
772 "T" : 2, | |
773 "G" : 3, | |
774 "C" : 4} | |
775 batchName = "batch1" | |
776 taxon = "Arabidopsis thaliana" | |
777 gene = "methyltransferase" | |
778 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
779 dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd) | |
780 self.assertEquals(dAlleleObsSet, dAlleleExpSet) | |
781 | |
782 def test_completeAlleleSetWithCurrentAllele_no_allele_added(self): | |
783 dAlleleSetInInput = {"A" : 1, | |
784 "T" : 2, | |
785 "G" : 3} | |
786 alleleToAdd = "T" | |
787 dAlleleExpSet = {"A" : 1, | |
788 "T" : 2, | |
789 "G" : 3} | |
790 batchName = "batch1" | |
791 taxon = "Arabidopsis thaliana" | |
792 gene = "methyltransferase" | |
793 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
794 dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd) | |
795 self.assertEquals(dAlleleObsSet, dAlleleExpSet) | |
796 | |
797 def test_completeAlleleSetWithCurrentAllele_with_an_empty_allele_set(self): | |
798 dAlleleSetInInput = {} | |
799 alleleToAdd = "T" | |
800 dAlleleExpSet = {"T" : 1} | |
801 batchName = "batch1" | |
802 taxon = "Arabidopsis thaliana" | |
803 gene = "methyltransferase" | |
804 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
805 dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd) | |
806 self.assertEquals(dAlleleObsSet, dAlleleExpSet) | |
807 | |
808 def test_completeBatchLineListWithCurrentIndividual(self): | |
809 #TODO: this test only pass with a batchNumber of 1 | |
810 iCurrentBatchNumber = 1 | |
811 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': iCurrentBatchNumber}, | |
812 {'IndividualNumber': "2", 'BatchNumber': iCurrentBatchNumber}] | |
813 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"}, | |
814 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"}, | |
815 {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}] | |
816 lExpBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': iCurrentBatchNumber}, | |
817 {'IndividualNumber': "2", 'BatchNumber': iCurrentBatchNumber}, | |
818 {'IndividualNumber': "3", 'BatchNumber': iCurrentBatchNumber}] | |
819 lineName2Add = "Individual3" | |
820 batchName = "batch1" | |
821 taxon = "Arabidopsis thaliana" | |
822 gene = "methyltransferase" | |
823 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
824 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add) | |
825 self.assertEquals(lBatchLineResults, lExpBatchLineResults) | |
826 | |
827 def test_completeBatchLineListWithCurrentIndividual_no_entries_in_batchline_results_in_input(self): | |
828 lBatchLineResults = [] | |
829 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"}, | |
830 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"}, | |
831 {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}] | |
832 lExpBatchLineResults = [{'IndividualNumber': "2", 'BatchNumber': 1}] | |
833 lineName2Add = "Individual2" | |
834 batchName = "batch1" | |
835 taxon = "Arabidopsis thaliana" | |
836 gene = "methyltransferase" | |
837 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
838 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add) | |
839 self.assertEquals(lBatchLineResults, lExpBatchLineResults) | |
840 | |
841 def test_completeBatchLineListWithCurrentIndividual_no_individual_in_individualList(self): | |
842 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': 1}, | |
843 {'IndividualNumber': "2", 'BatchNumber': 1}] | |
844 lIndividualResults = [] | |
845 | |
846 lineName2Add = "Individual3" | |
847 batchName = "batch1" | |
848 taxon = "Arabidopsis thaliana" | |
849 gene = "methyltransferase" | |
850 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
851 try: | |
852 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add) | |
853 except Exception, e : | |
854 self.assertRaises(Exception, e) | |
855 | |
856 def test_completeBatchLineListWithCurrentIndividual_individual_added_has_no_individual_number(self): | |
857 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': "1"}, | |
858 {'IndividualNumber': "2", 'BatchNumber': "1"}] | |
859 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"}, | |
860 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"}, | |
861 {'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}] | |
862 | |
863 lineName2Add = "Individual3" | |
864 batchName = "batch1" | |
865 taxon = "Arabidopsis thaliana" | |
866 gene = "methyltransferase" | |
867 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
868 try: | |
869 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add) | |
870 except Exception, e : | |
871 self.assertRaises(Exception, e) | |
872 | |
873 def test_completeBatchLineListWithCurrentIndividual_individual_not_present_in_individualList(self): | |
874 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': "1"}, | |
875 {'IndividualNumber': "2", 'BatchNumber': "1"}] | |
876 lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"}, | |
877 {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"}, | |
878 {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}] | |
879 | |
880 lineName2Add = "Michael Corleone" | |
881 batchName = "batch1" | |
882 taxon = "Arabidopsis thaliana" | |
883 gene = "methyltransferase" | |
884 multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) | |
885 try: | |
886 lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add) | |
887 except Exception, e : | |
888 self.assertRaises(Exception, e) | |
889 | |
890 def test_findASubSNPInAListWithHisName(self): | |
891 lSubSNPList = [{'subSNPName': "SubSNP_batch1_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"}, | |
892 {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"}, | |
893 {'subSNPName': "SubSNP_batch1_6_line1", 'position': 6, 'lineName': 1, 'allele': 3, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP"}] | |
894 name = "SubSNP_batch1_2_line1" | |
895 | |
896 dExpSubSNP = {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"} | |
897 expIndice = 1 | |
898 | |
899 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
900 | |
901 dObsSubSNP, obsIndice = multifasta2SNPFile.findASubSNPInAListWithHisName(name, lSubSNPList) | |
902 | |
903 self.assertEquals(expIndice, obsIndice) | |
904 self.assertEquals(dExpSubSNP, dObsSubSNP) | |
905 | |
906 def test_findASubSNPInAListWithHisName_SubSNP_not_found(self): | |
907 lSubSNPList = [{'subSNPName': "SubSNP_batch1_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"}, | |
908 {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"}, | |
909 {'subSNPName': "SubSNP_batch1_6_line1", 'position': 6, 'lineName': 1, 'allele': 3, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP"}] | |
910 name = "SubSNP_fake" | |
911 | |
912 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
913 | |
914 try: | |
915 dObsSubSNP, obsIndice = multifasta2SNPFile.findASubSNPInAListWithHisName(name, lSubSNPList) | |
916 except Exception, e : | |
917 self.assertRaises(Exception, e) | |
918 | |
919 def test_clusteriseIndels(self): | |
920 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
921 lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, | |
922 {'name' : "indel2", 'start': 12, 'end': 15}, | |
923 {'name' : "indel3",'start': 5, 'end': 10}] | |
924 dIndel = {'start': 1, 'end': 6} | |
925 | |
926 lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList) | |
927 lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 10}, | |
928 {'name' : "indel2", 'start': 12, 'end': 15}, | |
929 {'name' : "indel3", 'start': 1, 'end': 10}] | |
930 | |
931 self.assertEquals(lexpIndelsList, lObsIndelsList) | |
932 | |
933 def test_clusteriseIndels_no_overlap(self): | |
934 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
935 lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, | |
936 {'name' : "indel2", 'start': 12, 'end': 15}, | |
937 {'name' : "indel3",'start': 25, 'end': 30}] | |
938 dIndel = {'start': 1, 'end': 6} | |
939 | |
940 lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList) | |
941 lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, | |
942 {'name' : "indel2", 'start': 12, 'end': 15}, | |
943 {'name' : "indel3", 'start': 25, 'end': 30}] | |
944 | |
945 self.assertEquals(lexpIndelsList, lObsIndelsList) | |
946 | |
947 def test_clusteriseIndels_many_overlaps_complicated(self): | |
948 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
949 lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, | |
950 {'name' : "indel2", 'start': 12, 'end': 15}, | |
951 {'name' : "indel3",'start': 5, 'end': 10}, | |
952 {'name' : "indel4",'start': 9, 'end': 40}] | |
953 dIndel = {'start': 5, 'end': 10} | |
954 | |
955 lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList) | |
956 lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 40}, | |
957 {'name' : "indel2", 'start': 1, 'end': 40}, | |
958 {'name' : "indel3", 'start': 1, 'end': 40}, | |
959 {'name' : "indel4",'start': 1, 'end': 40}] | |
960 | |
961 self.assertEquals(lexpIndelsList, lObsIndelsList) | |
962 | |
963 def test_updateBoundsForAnIndelInAnIndelList(self): | |
964 lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, | |
965 {'name' : "indel2", 'start': 12, 'end': 15}, | |
966 {'name' : "indel3",'start': 5, 'end': 10}, | |
967 {'name' : "indel4",'start': 9, 'end': 40}] | |
968 dIndelWithNewBounds = {'name': "indel2", 'start': 7, 'end': 19} | |
969 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
970 lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds) | |
971 lExpNewIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, | |
972 {'name' : "indel2", 'start': 7, 'end': 19}, | |
973 {'name' : "indel3",'start': 5, 'end': 10}, | |
974 {'name' : "indel4",'start': 9, 'end': 40}] | |
975 self.assertEquals(lExpNewIndelsList, lObsNewIndelsList) | |
976 | |
977 def test_updateBoundsForAnIndelInAnIndelList_no_update_to_do(self): | |
978 lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, | |
979 {'name' : "indel2", 'start': 12, 'end': 15}, | |
980 {'name' : "indel3",'start': 5, 'end': 10}, | |
981 {'name' : "indel4",'start': 9, 'end': 40}] | |
982 dIndelWithNewBounds = {'name': "indel2", 'start': 12, 'end': 15} | |
983 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
984 lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds) | |
985 lExpNewIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, | |
986 {'name' : "indel2", 'start': 12, 'end': 15}, | |
987 {'name' : "indel3",'start': 5, 'end': 10}, | |
988 {'name' : "indel4",'start': 9, 'end': 40}] | |
989 self.assertEquals(lExpNewIndelsList, lObsNewIndelsList) | |
990 | |
991 def test_updateBoundsForAnIndelInAnIndelList_indel_2_update_does_not_exist(self): | |
992 lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, | |
993 {'name' : "indel2", 'start': 12, 'end': 15}, | |
994 {'name' : "indel3",'start': 5, 'end': 10}, | |
995 {'name' : "indel4",'start': 9, 'end': 40}] | |
996 dIndelWithNewBounds = {'name': "DeNiro", 'start': 12, 'end': 15} | |
997 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
998 try: | |
999 lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds) | |
1000 except Exception, e : | |
1001 self.assertRaises(Exception, e) | |
1002 | |
1003 def test_mergeBoundsFor2Indels(self): | |
1004 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1005 dIndel1 = {'start': 1, 'end': 4} | |
1006 dIndel2 = {'start': 2, 'end': 15} | |
1007 dIndel1, dIndel2 = multifasta2SNPFile.mergeBoundsForTwoOverlappingIndels(dIndel1, dIndel2) | |
1008 dExpIndel1 = {'start': 1, 'end': 15} | |
1009 dExpIndel2 = {'start': 1, 'end': 15} | |
1010 self.assertEquals(dExpIndel1, dIndel1) | |
1011 self.assertEquals(dExpIndel2, dIndel2) | |
1012 | |
1013 def test_mergeBoundsFor2Indels_no_overlap(self): | |
1014 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1015 dIndel1 = {'start': 1, 'end': 4} | |
1016 dIndel2 = {'start': 5, 'end': 15} | |
1017 dIndel1, dIndel2 = multifasta2SNPFile.mergeBoundsForTwoOverlappingIndels(dIndel1, dIndel2) | |
1018 dExpIndel1 = {'start': 1, 'end': 4} | |
1019 dExpIndel2 = {'start': 5, 'end': 15} | |
1020 self.assertEquals(dExpIndel1, dIndel1) | |
1021 self.assertEquals(dExpIndel2, dIndel2) | |
1022 | |
1023 def test_getUngappedPositionInRefSeq(self): | |
1024 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1025 refBioseq = Bioseq() | |
1026 alignedBioseqDB = BioseqDB() | |
1027 refBioseq.sequence = "A--TTACC-GAA" | |
1028 refBioseq.header = "reference" | |
1029 bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) | |
1030 bs2 = Bioseq( "line2", "AACTTACC-GAA" ) | |
1031 | |
1032 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
1033 | |
1034 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
1035 | |
1036 expUngappedPositionFor1 = 1 | |
1037 obsUngappedPositionFor1 = multifasta2SNPFile.getUngappedPositionInRefSeq(1) | |
1038 expUngappedPositionFor5 = 3 | |
1039 obsUngappedPositionFor5 = multifasta2SNPFile.getUngappedPositionInRefSeq(5) | |
1040 expUngappedPositionFor10 = 7 | |
1041 obsUngappedPositionFor10 = multifasta2SNPFile.getUngappedPositionInRefSeq(10) | |
1042 | |
1043 self.assertEquals(expUngappedPositionFor1, obsUngappedPositionFor1) | |
1044 self.assertEquals(expUngappedPositionFor5, obsUngappedPositionFor5) | |
1045 self.assertEquals(expUngappedPositionFor10, obsUngappedPositionFor10) | |
1046 | |
1047 def test_getUngappedPositionInRefSeq_no_gap(self): | |
1048 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1049 refBioseq = Bioseq() | |
1050 alignedBioseqDB = BioseqDB() | |
1051 refBioseq.sequence = "AACTTACCAGAA" | |
1052 refBioseq.header = "reference" | |
1053 bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) | |
1054 bs2 = Bioseq( "line2", "AACTTACC-GAA" ) | |
1055 | |
1056 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
1057 | |
1058 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
1059 | |
1060 expUngappedPositionFor1 = 1 | |
1061 obsUngappedPositionFor1 = multifasta2SNPFile.getUngappedPositionInRefSeq(1) | |
1062 expUngappedPositionFor5 = 5 | |
1063 obsUngappedPositionFor5 = multifasta2SNPFile.getUngappedPositionInRefSeq(5) | |
1064 expUngappedPositionFor10 = 10 | |
1065 obsUngappedPositionFor10 = multifasta2SNPFile.getUngappedPositionInRefSeq(10) | |
1066 | |
1067 self.assertEquals(expUngappedPositionFor1, obsUngappedPositionFor1) | |
1068 self.assertEquals(expUngappedPositionFor5, obsUngappedPositionFor5) | |
1069 self.assertEquals(expUngappedPositionFor10, obsUngappedPositionFor10) | |
1070 | |
1071 def test_checkAllSeq_sequences_with_different_sizes_one_seq_longer(self): | |
1072 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1073 refBioseq = Bioseq() | |
1074 alignedBioseqDB = BioseqDB() | |
1075 refBioseq.sequence = "AACTTACCAGAA" | |
1076 refBioseq.header = "reference" | |
1077 bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) | |
1078 bs2 = Bioseq( "line2", "AACTTACC-GAATTTC" ) | |
1079 | |
1080 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
1081 | |
1082 try: | |
1083 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
1084 except Exception, e : | |
1085 self.assertRaises(Exception, e) | |
1086 obsMsg = e.message | |
1087 expMsg = "File: " + self._inFileName + ", problem with the sequence " + bs2.header + ": its length is different from the reference seq! All the sequences must have the same length.\n" | |
1088 expMsg += "refseq length: " + str(len(refBioseq.sequence)) + "\n" | |
1089 expMsg += "seq length: " + str(len(bs2.sequence)) + "\n" | |
1090 self.assertEquals(expMsg, obsMsg) | |
1091 | |
1092 def test_checkAllSeq_sequences_with_different_sizes_one_seq_shorter(self): | |
1093 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1094 refBioseq = Bioseq() | |
1095 alignedBioseqDB = BioseqDB() | |
1096 refBioseq.sequence = "AACTTACCAGAA" | |
1097 refBioseq.header = "reference" | |
1098 bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) | |
1099 bs2 = Bioseq( "line2", "AACTTACC" ) | |
1100 | |
1101 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
1102 | |
1103 try: | |
1104 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
1105 except Exception, e : | |
1106 self.assertRaises(Exception, e) | |
1107 obsMsg = e.message | |
1108 expMsg = "File: " + self._inFileName + ", problem with the sequence " + bs2.header + ": its length is different from the reference seq! All the sequences must have the same length.\n" | |
1109 expMsg += "refseq length: " + str(len(refBioseq.sequence)) + "\n" | |
1110 expMsg += "seq length: " + str(len(bs2.sequence)) + "\n" | |
1111 self.assertEquals(expMsg, obsMsg) | |
1112 | |
1113 | |
1114 def test_getFlanksOfASubSNP(self): | |
1115 refBioseq = Bioseq() | |
1116 alignedBioseqDB = BioseqDB() | |
1117 refBioseq.sequence = "AACTTACCAGAA" | |
1118 refBioseq.header = "reference" | |
1119 bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) | |
1120 bs2 = Bioseq( "line2", "AACTTACC-GAA" ) | |
1121 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
1122 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1123 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
1124 subsnpPosition = 3 | |
1125 polymLength = 3 | |
1126 lineName = "line1" | |
1127 exp5flank = "AA" | |
1128 exp3flank = "TCCAGAA" | |
1129 | |
1130 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 7) | |
1131 self.assertEquals(exp5flank, obs5flank) | |
1132 self.assertEquals(exp3flank, obs3flank) | |
1133 | |
1134 def test_getFlanksOfASubSNP_flank_truncated(self): | |
1135 refBioseq = Bioseq() | |
1136 alignedBioseqDB = BioseqDB() | |
1137 refBioseq.sequence = "AACTTACCAGAA" | |
1138 refBioseq.header = "reference" | |
1139 bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) | |
1140 bs2 = Bioseq( "line2", "AACTTACC-GAA" ) | |
1141 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
1142 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1143 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
1144 subsnpPosition = 3 | |
1145 polymLength = 3 | |
1146 lineName = "line1" | |
1147 exp5flank = "AA" | |
1148 exp3flank = "TCCAGAA" | |
1149 | |
1150 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500) | |
1151 self.assertEquals(exp5flank, obs5flank) | |
1152 self.assertEquals(exp3flank, obs3flank) | |
1153 | |
1154 def test_getFlanksOfASubSNP_empty_seq(self): | |
1155 refBioseq = Bioseq() | |
1156 alignedBioseqDB = BioseqDB() | |
1157 refBioseq.sequence = "" | |
1158 refBioseq.header = "reference" | |
1159 bs1 = Bioseq( "line1", "" ) | |
1160 bs2 = Bioseq( "line2", "" ) | |
1161 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
1162 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1163 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
1164 subsnpPosition = 3 | |
1165 polymLength = 3 | |
1166 lineName = "line1" | |
1167 exp5flank = "" | |
1168 exp3flank = "" | |
1169 | |
1170 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500) | |
1171 self.assertEquals(exp5flank, obs5flank) | |
1172 self.assertEquals(exp3flank, obs3flank) | |
1173 | |
1174 def test_getFlanksOfASubSNP_flank_of_first_base(self): | |
1175 refBioseq = Bioseq() | |
1176 alignedBioseqDB = BioseqDB() | |
1177 refBioseq.sequence = "AACTTACCAGAA" | |
1178 refBioseq.header = "reference" | |
1179 bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) | |
1180 bs2 = Bioseq( "line2", "AACTTACC-GAA" ) | |
1181 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
1182 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1183 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
1184 subsnpPosition = 1 | |
1185 polymLength = 1 | |
1186 lineName = "line1" | |
1187 exp5flank = "" | |
1188 exp3flank = "ACTTTCCAGAA" | |
1189 | |
1190 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500) | |
1191 self.assertEquals(exp5flank, obs5flank) | |
1192 self.assertEquals(exp3flank, obs3flank) | |
1193 | |
1194 def test_getFlanksOfASubSNP_flank_of_first_base_with_polym_on_all_sequence(self): | |
1195 refBioseq = Bioseq() | |
1196 alignedBioseqDB = BioseqDB() | |
1197 refBioseq.sequence = "AACTTACCAGAA" | |
1198 refBioseq.header = "reference" | |
1199 bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) | |
1200 bs2 = Bioseq( "line2", "AACTTACC-GAA" ) | |
1201 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
1202 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1203 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
1204 subsnpPosition = 1 | |
1205 polymLength = 12 | |
1206 lineName = "line1" | |
1207 exp5flank = "" | |
1208 exp3flank = "" | |
1209 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500) | |
1210 self.assertEquals(exp5flank, obs5flank) | |
1211 self.assertEquals(exp3flank, obs3flank) | |
1212 | |
1213 def test_getFlanksOfASubSNP_flank_of_last_base_with_polym_on_all_sequence(self): | |
1214 refBioseq = Bioseq() | |
1215 alignedBioseqDB = BioseqDB() | |
1216 refBioseq.sequence = "AACTTACCAGAA" | |
1217 refBioseq.header = "reference" | |
1218 bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) | |
1219 bs2 = Bioseq( "line2", "AACTTACC-GAA" ) | |
1220 alignedBioseqDB.setData( [ bs1, bs2 ] ) | |
1221 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") | |
1222 multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) | |
1223 subsnpPosition = 12 | |
1224 polymLength = 1 | |
1225 lineName = "line1" | |
1226 exp5flank = "AACTTTCCAGA" | |
1227 exp3flank = "" | |
1228 obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500) | |
1229 self.assertEquals(exp5flank, obs5flank) | |
1230 self.assertEquals(exp3flank, obs3flank) | |
1231 # | |
1232 def test_subSNPExistsInSubSNPList_subSNP_exists(self): | |
1233 batchName = "batch1" | |
1234 lSubSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, | |
1235 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, | |
1236 {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
1237 {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
1238 {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
1239 {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
1240 {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, | |
1241 {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}] | |
1242 multifasta2SNPFile = Multifasta2SNPFile(batchName, "gene1", "mouse") | |
1243 | |
1244 dSearchedSubSNP = {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4} | |
1245 | |
1246 expResult = multifasta2SNPFile.subSNPExistsInSubSNPList(dSearchedSubSNP, lSubSNP) | |
1247 obsResult = True | |
1248 | |
1249 self.assertEquals(expResult, obsResult) | |
1250 | |
1251 def test_subSNPExistsInSubSNPList_subSNP_does_not_exist(self): | |
1252 batchName = "batch1" | |
1253 lSubSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, | |
1254 {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, | |
1255 {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
1256 {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
1257 {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
1258 {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, | |
1259 {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, | |
1260 {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}] | |
1261 multifasta2SNPFile = Multifasta2SNPFile(batchName, "gene1", "mouse") | |
1262 | |
1263 dSearchedSubSNP = {'subSNPName': batchName + "_DEL_12_line1", 'position': 12, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4} | |
1264 | |
1265 expResult = multifasta2SNPFile.subSNPExistsInSubSNPList(dSearchedSubSNP, lSubSNP) | |
1266 obsResult = False | |
1267 | |
1268 self.assertEquals(expResult, obsResult) | |
1269 | |
1270 def _writeExpSubSNPFile(self): | |
1271 expFileHandle = open(self._expSubSNPFileName, "w") | |
1272 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n") | |
1273 expFileHandle.write("Batch1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n") | |
1274 expFileHandle.write("Batch1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") | |
1275 expFileHandle.write("Batch1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n") | |
1276 expFileHandle.write("Batch1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1277 expFileHandle.write("Batch1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n") | |
1278 expFileHandle.write("Batch1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1279 expFileHandle.write("Batch1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n") | |
1280 expFileHandle.write("Batch1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1281 expFileHandle.write("Batch1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n") | |
1282 expFileHandle.write("Batch1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n") | |
1283 expFileHandle.close() | |
1284 | |
1285 def _writeExpSubSNPFileWithSnpsAndIndels(self): | |
1286 expFileHandle = open(self._expSubSNPFileName, "w") | |
1287 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n") | |
1288 expFileHandle.write("Batch1_INS_1_Line1;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;1;1;Sequence;;;8\n") | |
1289 expFileHandle.write("Batch1_INS_1_Line2;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;1;2;Sequence;;;6\n") | |
1290 expFileHandle.write("Batch1_SNP_2_Line1;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n") | |
1291 expFileHandle.write("Batch1_SNP_2_Line2;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") | |
1292 expFileHandle.write("Batch1_DEL_8_Line1;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;1;1;Sequence;;;5\n") | |
1293 expFileHandle.write("Batch1_DEL_8_Line2;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;1;2;Sequence;;;7\n") | |
1294 expFileHandle.write("Batch1_SNP_19_Line1;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n") | |
1295 expFileHandle.write("Batch1_SNP_19_Line2;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1296 expFileHandle.write("Batch1_SNP_23_Line1;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n") | |
1297 expFileHandle.write("Batch1_SNP_23_Line2;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1298 expFileHandle.write("Batch1_SNP_34_Line1;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n") | |
1299 expFileHandle.write("Batch1_SNP_34_Line2;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1300 expFileHandle.write("Batch1_SNP_49_Line1;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n") | |
1301 expFileHandle.write("Batch1_SNP_49_Line2;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n") | |
1302 expFileHandle.close() | |
1303 | |
1304 def _writeExpSubSNPFileSeveralBatches(self): | |
1305 expFileHandle = open(self._expSubSNPFileName, "w") | |
1306 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n") | |
1307 expFileHandle.write("Batch_Gene1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n") | |
1308 expFileHandle.write("Batch_Gene1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") | |
1309 expFileHandle.write("Batch_Gene1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n") | |
1310 expFileHandle.write("Batch_Gene1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1311 expFileHandle.write("Batch_Gene1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n") | |
1312 expFileHandle.write("Batch_Gene1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1313 expFileHandle.write("Batch_Gene1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n") | |
1314 expFileHandle.write("Batch_Gene1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1315 expFileHandle.write("Batch_Gene1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n") | |
1316 expFileHandle.write("Batch_Gene1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n") | |
1317 | |
1318 expFileHandle.write("Batch_Gene2_INS_1_Line1;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;2;1;Sequence;;;8\n") | |
1319 expFileHandle.write("Batch_Gene2_INS_1_Line2;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;2;2;Sequence;;;6\n") | |
1320 expFileHandle.write("Batch_Gene2_SNP_2_Line1;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;1\n") | |
1321 expFileHandle.write("Batch_Gene2_SNP_2_Line2;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;4\n") | |
1322 expFileHandle.write("Batch_Gene2_DEL_8_Line1;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;2;1;Sequence;;;5\n") | |
1323 expFileHandle.write("Batch_Gene2_DEL_8_Line2;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;2;2;Sequence;;;7\n") | |
1324 expFileHandle.write("Batch_Gene2_SNP_19_Line1;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;2\n") | |
1325 expFileHandle.write("Batch_Gene2_SNP_19_Line2;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;2\n") | |
1326 expFileHandle.write("Batch_Gene2_SNP_23_Line1;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;3\n") | |
1327 expFileHandle.write("Batch_Gene2_SNP_23_Line2;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;2\n") | |
1328 expFileHandle.write("Batch_Gene2_SNP_34_Line1;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;2;1;Sequence;;;3\n") | |
1329 expFileHandle.write("Batch_Gene2_SNP_34_Line2;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;2;2;Sequence;;;2\n") | |
1330 expFileHandle.write("Batch_Gene2_SNP_49_Line1;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;2;1;Sequence;;;2\n") | |
1331 expFileHandle.write("Batch_Gene2_SNP_49_Line2;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;2;2;Sequence;;;4\n") | |
1332 expFileHandle.close() | |
1333 | |
1334 def _writeExpSubSNPFileSeveralBatches_different_lines_between_files(self): | |
1335 expFileHandle = open(self._expSubSNPFileName, "w") | |
1336 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n") | |
1337 expFileHandle.write("Batch_Gene1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n") | |
1338 expFileHandle.write("Batch_Gene1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") | |
1339 expFileHandle.write("Batch_Gene1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n") | |
1340 expFileHandle.write("Batch_Gene1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1341 expFileHandle.write("Batch_Gene1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n") | |
1342 expFileHandle.write("Batch_Gene1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1343 expFileHandle.write("Batch_Gene1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n") | |
1344 expFileHandle.write("Batch_Gene1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1345 expFileHandle.write("Batch_Gene1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n") | |
1346 expFileHandle.write("Batch_Gene1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n") | |
1347 | |
1348 expFileHandle.write("Batch_Gene2_INS_1_Line3;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;2;3;Sequence;;;8\n") | |
1349 expFileHandle.write("Batch_Gene2_INS_1_Line4;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;2;4;Sequence;;;6\n") | |
1350 expFileHandle.write("Batch_Gene2_SNP_2_Line3;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;1\n") | |
1351 expFileHandle.write("Batch_Gene2_SNP_2_Line4;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;4\n") | |
1352 expFileHandle.write("Batch_Gene2_DEL_8_Line3;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;2;3;Sequence;;;5\n") | |
1353 expFileHandle.write("Batch_Gene2_DEL_8_Line4;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;2;4;Sequence;;;7\n") | |
1354 expFileHandle.write("Batch_Gene2_SNP_19_Line3;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;2\n") | |
1355 expFileHandle.write("Batch_Gene2_SNP_19_Line4;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;2\n") | |
1356 expFileHandle.write("Batch_Gene2_SNP_23_Line3;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;3\n") | |
1357 expFileHandle.write("Batch_Gene2_SNP_23_Line4;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;2\n") | |
1358 expFileHandle.write("Batch_Gene2_SNP_34_Line3;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;2;3;Sequence;;;3\n") | |
1359 expFileHandle.write("Batch_Gene2_SNP_34_Line4;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;2;4;Sequence;;;2\n") | |
1360 expFileHandle.write("Batch_Gene2_SNP_49_Line3;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;2;3;Sequence;;;2\n") | |
1361 expFileHandle.write("Batch_Gene2_SNP_49_Line4;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;2;4;Sequence;;;4\n") | |
1362 expFileHandle.close() | |
1363 | |
1364 def _writeExpSubSNPFileSeveralLineSeq(self): | |
1365 expFileHandle = open(self._expSubSNPFileName, "w") | |
1366 expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n") | |
1367 expFileHandle.write("Batch1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;1\n") | |
1368 expFileHandle.write("Batch1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") | |
1369 expFileHandle.write("Batch1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;2\n") | |
1370 expFileHandle.write("Batch1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1371 expFileHandle.write("Batch1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;3\n") | |
1372 expFileHandle.write("Batch1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1373 expFileHandle.write("Batch1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;3\n") | |
1374 expFileHandle.write("Batch1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") | |
1375 expFileHandle.write("Batch1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;CGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;2\n") | |
1376 expFileHandle.write("Batch1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;CGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") | |
1377 expFileHandle.close() | |
1378 | |
1379 | |
1380 def _writeExpAlleleFile(self): | |
1381 expFileHandle = open(self._expAlleleFileName, "w") | |
1382 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n") | |
1383 expFileHandle.write("1;T;;;\n") | |
1384 expFileHandle.write("2;C;;;\n") | |
1385 expFileHandle.write("3;G;;;\n") | |
1386 expFileHandle.write("4;A;;;\n") | |
1387 expFileHandle.close() | |
1388 | |
1389 def _writeExpAlleleFileWithSnpsAndIndels(self): | |
1390 expFileHandle = open(self._expAlleleFileName, "w") | |
1391 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n") | |
1392 expFileHandle.write("1;T;;;\n") | |
1393 expFileHandle.write("2;C;;;\n") | |
1394 expFileHandle.write("3;G;;;\n") | |
1395 expFileHandle.write("4;A;;;\n") | |
1396 expFileHandle.write("5;---;;;\n") | |
1397 expFileHandle.write("6;CT;;;\n") | |
1398 expFileHandle.write("7;TT-;;;\n") | |
1399 expFileHandle.write("8;--;;;\n") | |
1400 expFileHandle.close() | |
1401 | |
1402 | |
1403 def _writeExpAlleleFileSeveralBatches(self): | |
1404 expFileHandle = open(self._expAlleleFileName, "w") | |
1405 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n") | |
1406 expFileHandle.write("1;T;;;\n") | |
1407 expFileHandle.write("2;C;;;\n") | |
1408 expFileHandle.write("3;G;;;\n") | |
1409 expFileHandle.write("4;A;;;\n") | |
1410 expFileHandle.write("5;---;;;\n") | |
1411 expFileHandle.write("6;CT;;;\n") | |
1412 expFileHandle.write("7;TT-;;;\n") | |
1413 expFileHandle.write("8;--;;;\n") | |
1414 expFileHandle.close() | |
1415 | |
1416 def _writeExpIndividualFile(self): | |
1417 expFileHandle = open(self._expIndividualFileName, "w") | |
1418 expFileHandle.write("IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id\n") | |
1419 expFileHandle.write("1;Line1;;;;;;;;;;Arabidopsis thaliana;;;;;\n") | |
1420 expFileHandle.write("2;Line2;;;;;;;;;;Arabidopsis thaliana;;;;;\n") | |
1421 expFileHandle.close() | |
1422 | |
1423 def _writeExpIndividualFile_different_lines_between_files(self): | |
1424 expFileHandle = open(self._expIndividualFileName, "w") | |
1425 expFileHandle.write("IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id\n") | |
1426 expFileHandle.write("1;Line1;;;;;;;;;;Arabidopsis thaliana;;;;;\n") | |
1427 expFileHandle.write("2;Line2;;;;;;;;;;Arabidopsis thaliana;;;;;\n") | |
1428 expFileHandle.write("3;Line3;;;;;;;;;;Arabidopsis thaliana;;;;;\n") | |
1429 expFileHandle.write("4;Line4;;;;;;;;;;Arabidopsis thaliana;;;;;\n") | |
1430 expFileHandle.close() | |
1431 | |
1432 def _writeExpSequenceFile(self): | |
1433 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w") | |
1434 SequenceFSAFileHandle.write(">Sequence_de_Reference\n") | |
1435 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1436 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w") | |
1437 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") | |
1438 SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n") | |
1439 | |
1440 def _writeExpSequenceFileSeveralLineSeq(self): | |
1441 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w") | |
1442 SequenceFSAFileHandle.write(">Sequence_de_Reference\n") | |
1443 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n") | |
1444 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w") | |
1445 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") | |
1446 SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n") | |
1447 | |
1448 def _writeExpSequenceFileWithDeletion(self): | |
1449 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w") | |
1450 SequenceFSAFileHandle.write(">Sequence_de_Reference\n") | |
1451 SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1452 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w") | |
1453 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") | |
1454 SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n") | |
1455 | |
1456 def _writeExpSequenceSeveralBatches(self): | |
1457 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w") | |
1458 SequenceFSAFileHandle.write(">Sequence_de_Reference1\n") | |
1459 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1460 SequenceFSAFileHandle.write(">Sequence_de_Reference2\n") | |
1461 SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1462 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w") | |
1463 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") | |
1464 SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n") | |
1465 SequenceCSVFileHandle.write("Sequence_de_Reference2;Reference;;;;;Arabidopsis thaliana\n") | |
1466 | |
1467 def _writeExpSequenceSeveralBatchesForSameRefSeq(self): | |
1468 SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w") | |
1469 SequenceFSAFileHandle.write(">Sequence_de_Reference1\n") | |
1470 SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1471 SequenceFSAFileHandle.write(">Sequence_de_Reference1\n") | |
1472 SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1473 SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w") | |
1474 SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") | |
1475 SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n") | |
1476 SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n") | |
1477 | |
1478 def _writeExpBatchFile(self): | |
1479 BatchFileHandle = open(self._expBatchFileName, "w") | |
1480 BatchFileHandle.write("BatchNumber: 1\n") | |
1481 BatchFileHandle.write("BatchName: Batch1\n") | |
1482 BatchFileHandle.write("GeneName: methyltransferase\n") | |
1483 BatchFileHandle.write("Description: \n") | |
1484 BatchFileHandle.write("ContactNumber: 1\n") | |
1485 BatchFileHandle.write("ProtocolNumber: 1\n") | |
1486 BatchFileHandle.write("ThematicNumber: 1\n") | |
1487 BatchFileHandle.write("RefSeqName: Sequence_de_Reference\n") | |
1488 BatchFileHandle.write("AlignmentFileName: \n") | |
1489 BatchFileHandle.write("SeqName: \n") | |
1490 BatchFileHandle.write("//\n") | |
1491 BatchFileHandle.close() | |
1492 | |
1493 def _writeExpBatchFileSeveralBatches(self): | |
1494 BatchFileHandle = open(self._expBatchFileName, "w") | |
1495 BatchFileHandle.write("BatchNumber: 1\n") | |
1496 BatchFileHandle.write("BatchName: Batch_Gene1\n") | |
1497 BatchFileHandle.write("GeneName: Gene1\n") | |
1498 BatchFileHandle.write("Description: \n") | |
1499 BatchFileHandle.write("ContactNumber: 1\n") | |
1500 BatchFileHandle.write("ProtocolNumber: 1\n") | |
1501 BatchFileHandle.write("ThematicNumber: 1\n") | |
1502 BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n") | |
1503 BatchFileHandle.write("AlignmentFileName: \n") | |
1504 BatchFileHandle.write("SeqName: \n") | |
1505 BatchFileHandle.write("//\n") | |
1506 BatchFileHandle.write("BatchNumber: 2\n") | |
1507 BatchFileHandle.write("BatchName: Batch_Gene2\n") | |
1508 BatchFileHandle.write("GeneName: Gene2\n") | |
1509 BatchFileHandle.write("Description: \n") | |
1510 BatchFileHandle.write("ContactNumber: 1\n") | |
1511 BatchFileHandle.write("ProtocolNumber: 1\n") | |
1512 BatchFileHandle.write("ThematicNumber: 1\n") | |
1513 BatchFileHandle.write("RefSeqName: Sequence_de_Reference2\n") | |
1514 BatchFileHandle.write("AlignmentFileName: \n") | |
1515 BatchFileHandle.write("SeqName: \n") | |
1516 BatchFileHandle.write("//\n") | |
1517 BatchFileHandle.close() | |
1518 | |
1519 def _writeExpBatchFileSeveralBatchesForSameRefSeq(self): | |
1520 BatchFileHandle = open(self._expBatchFileName, "w") | |
1521 BatchFileHandle.write("BatchNumber: 1\n") | |
1522 BatchFileHandle.write("BatchName: Batch_Gene1\n") | |
1523 BatchFileHandle.write("GeneName: Gene1\n") | |
1524 BatchFileHandle.write("Description: \n") | |
1525 BatchFileHandle.write("ContactNumber: 1\n") | |
1526 BatchFileHandle.write("ProtocolNumber: 1\n") | |
1527 BatchFileHandle.write("ThematicNumber: 1\n") | |
1528 BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n") | |
1529 BatchFileHandle.write("AlignmentFileName: \n") | |
1530 BatchFileHandle.write("SeqName: \n") | |
1531 BatchFileHandle.write("//\n") | |
1532 BatchFileHandle.write("BatchNumber: 2\n") | |
1533 BatchFileHandle.write("BatchName: Batch_Gene2\n") | |
1534 BatchFileHandle.write("GeneName: Gene2\n") | |
1535 BatchFileHandle.write("Description: \n") | |
1536 BatchFileHandle.write("ContactNumber: 1\n") | |
1537 BatchFileHandle.write("ProtocolNumber: 1\n") | |
1538 BatchFileHandle.write("ThematicNumber: 1\n") | |
1539 BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n") | |
1540 BatchFileHandle.write("AlignmentFileName: \n") | |
1541 BatchFileHandle.write("SeqName: \n") | |
1542 BatchFileHandle.write("//\n") | |
1543 BatchFileHandle.close() | |
1544 | |
1545 | |
1546 BatchFileHandle.close() | |
1547 | |
1548 def _writeExpBatchLineFile(self): | |
1549 BatchLineFileHandle = open(self._expBatchLineFileName, "w") | |
1550 BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n") | |
1551 BatchLineFileHandle.write("1;;;1;\n") | |
1552 BatchLineFileHandle.write("2;;;1;\n") | |
1553 BatchLineFileHandle.close() | |
1554 | |
1555 | |
1556 def _writeExpBatchLineFileSeveralBatches(self): | |
1557 BatchLineFileHandle = open(self._expBatchLineFileName, "w") | |
1558 BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n") | |
1559 BatchLineFileHandle.write("1;;;1;\n") | |
1560 BatchLineFileHandle.write("2;;;1;\n") | |
1561 BatchLineFileHandle.write("1;;;2;\n") | |
1562 BatchLineFileHandle.write("2;;;2;\n") | |
1563 BatchLineFileHandle.close() | |
1564 | |
1565 def _writeExpBatchLineFileSeveralBatches_different_lines_between_files(self): | |
1566 BatchLineFileHandle = open(self._expBatchLineFileName, "w") | |
1567 BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n") | |
1568 BatchLineFileHandle.write("1;;;1;\n") | |
1569 BatchLineFileHandle.write("2;;;1;\n") | |
1570 BatchLineFileHandle.write("3;;;2;\n") | |
1571 BatchLineFileHandle.write("4;;;2;\n") | |
1572 BatchLineFileHandle.close() | |
1573 | |
1574 def _writeInputFile(self): | |
1575 inFileHandle = open(self._inFileName, "w") | |
1576 inFileHandle.write(">Sequence_de_Reference\n") | |
1577 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1578 inFileHandle.write(">Line1\n") | |
1579 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") | |
1580 inFileHandle.write(">Line2\n") | |
1581 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") | |
1582 inFileHandle.close() | |
1583 | |
1584 def _writeInputFileWithSnpsAndIndels(self): | |
1585 inFileHandle = open(self._inFileName, "w") | |
1586 inFileHandle.write(">Sequence_de_Reference\n") | |
1587 inFileHandle.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1588 inFileHandle.write(">Line1\n") | |
1589 inFileHandle.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") | |
1590 inFileHandle.write(">Line2\n") | |
1591 inFileHandle.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") | |
1592 inFileHandle.close() | |
1593 | |
1594 def _writeInputFileWithSeqErrorsInRefSeq(self): | |
1595 inFileHandle = open(self._inFileName, "w") | |
1596 inFileHandle.write(">Sequence_de_Reference\n") | |
1597 inFileHandle.write("CCTA7GCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1598 inFileHandle.write(">Line1\n") | |
1599 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") | |
1600 inFileHandle.write(">Line2\n") | |
1601 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") | |
1602 inFileHandle.close() | |
1603 | |
1604 def _writeInputFileWithSeqErrorsInOneLineSeq(self): | |
1605 inFileHandle = open(self._inFileName, "w") | |
1606 inFileHandle.write(">Sequence_de_Reference\n") | |
1607 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1608 inFileHandle.write(">Line1\n") | |
1609 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATXAAGGCAGTAGGCAAACCTCCACAATC\n") | |
1610 inFileHandle.write(">Line2\n") | |
1611 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") | |
1612 inFileHandle.close() | |
1613 | |
1614 def _writeInputFileWithASeveralLineSeq(self): | |
1615 inFileHandle = open(self._inFileName, "w") | |
1616 inFileHandle.write(">Sequence_de_Reference\n") | |
1617 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n") | |
1618 inFileHandle.write(">Line1\n") | |
1619 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n") | |
1620 inFileHandle.write(">Line2\n") | |
1621 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA\nCGCAGTAGCCAAA\nCCTCCACAATA\n") | |
1622 inFileHandle.close() | |
1623 | |
1624 | |
1625 def _writeInputFileWithUpcaseAndLowcaseNucleotide(self): | |
1626 inFileHandle = open(self._inFileName, "w") | |
1627 inFileHandle.write(">Sequence_de_Reference\n") | |
1628 inFileHandle.write("CCTAAGCCATTGCTTGGtGATTATGAAGgCAGTAGTCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n") | |
1629 inFileHandle.write(">Line1\n") | |
1630 inFileHandle.write("CCTTAGCCATTGCtTGGTGACTATGAAGGcAGTAGGCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n") | |
1631 inFileHandle.write(">Line2\n") | |
1632 inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\nCGCAGTAGCCAAA\nCCTCCACAATA\n") | |
1633 inFileHandle.close() | |
1634 | |
1635 def _writeInputFileWith2SeqsWithTheSameName(self): | |
1636 inFileHandle = open(self._inFileName, "w") | |
1637 inFileHandle.write(">Sequence_de_Reference\n") | |
1638 inFileHandle.write("CCTAAGCCATTGCTTGGtGATTATGAAGgCAGTAGTCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n") | |
1639 inFileHandle.write(">Line1\n") | |
1640 inFileHandle.write("CCTTAGCCATTGCtTGGTGACTATGAAGGcAGTAGGCAAACCTCCACAATC\n") | |
1641 inFileHandle.write(">Line2\n") | |
1642 inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\n") | |
1643 inFileHandle.write(">Line2\n") | |
1644 inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\n") | |
1645 inFileHandle.close() | |
1646 | |
1647 def _writeInputFileBatchWithPotentialDooblons(self): | |
1648 inFileHandle = open(self._inFileName, "w") | |
1649 inFileHandle.write(">AU247387ref\n") | |
1650 inFileHandle.write("CACTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGTTCTGGTTACTCTTCAATTTGGGCATGCTTAATTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTTTATAGCCTGCTCACCAACATGTGATCTGTTCTTTGTATGCTCAGGTGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCACTTGCTATACTGTCTATTTGTTCGCAGGTGATTGCAGGTCTGTGGATCCTCTGGGTTCTTTCTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTGTGCTGCAACAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCATACGAGTTGTGAACTGATGACATCCTCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG\n") | |
1651 inFileHandle.write(">10102\n") | |
1652 inFileHandle.write("NNNtatagctcctaacattcctgaagtgaagatcacrgaggacnnggctgtcaatgttgcccgctcgctgagatatgagatcaacaggggcttygctagcttgagggcgattggNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1653 inFileHandle.write(">10954\n") | |
1654 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1655 inFileHandle.write(">ABERAVON\n") | |
1656 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1657 inFileHandle.write(">CARILLON\n") | |
1658 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatcggtcagggccgtgatctgaagaaattcctcattgtatgttctggttactcttcaatttgggcatgcttaat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctgttctttgtatgctcaggtggttgccgg---------------------------------------------------------------------------------------------------cctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaat------------------tttaacttgtgctgcaacacttgagttcataaccaccctag------ttgtccatacgagttgtgaactgatgacatccgttctttttcccragtgcagtcttcgtggtgctctacacggtgccagttctgtatgaNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1659 inFileHandle.write(">CONCERTO\n") | |
1660 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacttgcagtcttcatggttctctacactgtgccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1661 inFileHandle.write(">F14-13\n") | |
1662 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtsaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaaNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1663 inFileHandle.write(">GAGNY\n") | |
1664 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatyggtcagggccgtgayctgaagaaattcctsattgtaygtNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1665 inFileHandle.write(">GREECE\n") | |
1666 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtsaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatyggycagggccgtgatctgaagaaattcctcattgtatgtNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1667 inFileHandle.write(">IMAGINE\n") | |
1668 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacttgcagtcttcatggttctctacactgtgccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1669 inFileHandle.write(">IRELAND\n") | |
1670 inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGACGATTCCAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGTTGCTGG---------------------------------------------------------------------------------------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACNTTGGCATATATAGGTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1671 inFileHandle.write(">NEMOF\n") | |
1672 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1673 inFileHandle.write(">NEMOH\n") | |
1674 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1675 inFileHandle.write(">POLAND\n") | |
1676 inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1677 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1678 inFileHandle.write(">SPAIN\n") | |
1679 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatcggtcagggccgtgatctgaagaaattcctcattgtatgttctggttactcttcaatttgggcatgcttaat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctgttctttgtatgctcaggtggttgccgg---------------------------------------------------------------------------------------------------cctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaat------------------tttaacttgtgctgcaacacttgagttcataaccaccctag------ttgtccatacgagttgtgaactgatgacatccgttctttttcccgagtgcagtcttcgtggtgctctacacggtgccagttctgtatgagaagtacgacgacaaggttgatgcttttggtgagaag\n") | |
1680 inFileHandle.write(">TRANSATE\n") | |
1681 inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatYggccagggtcgcgacctcaagaaattcctcattgtatgttgcttgt-ctcttcaatttcaacatgcttgat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctcttctttgtatgctcaggtggttgcggg---------------------------------------------------------------------------------------------------tctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaaK------------------tataRcttgtgctgcaacacttgagttcataaccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1682 inFileHandle.write(">VIGOR\n") | |
1683 inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCACTTGCTATACTGTCTATTTGTTCGCAGGTGATTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG\n") | |
1684 inFileHandle.close() | |
1685 | |
1686 def _writeRealExpAlleleFile(self): | |
1687 expFileHandle = open(self._expAlleleFileName, "w") | |
1688 expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n") | |
1689 expFileHandle.write("1;G;;;\n") | |
1690 expFileHandle.write("2;T;;;\n") | |
1691 expFileHandle.write("3;A;;;\n") | |
1692 expFileHandle.write("4;C;;;\n") | |
1693 expFileHandle.write("5;-;;;\n") | |
1694 expFileHandle.close(); | |
1695 | |
1696 def _writeRealExpSequenceCSVFile(self): | |
1697 SequenceFSAFileHandle = open(self._expSequenceCSVFileName, "w") | |
1698 SequenceFSAFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") | |
1699 SequenceFSAFileHandle.write("PpHDZ31_ref;Reference;;;;;Pinus pinaster\n") | |
1700 SequenceFSAFileHandle.close() | |
1701 | |
1702 def _writeRealExpBatchFile(self): | |
1703 FileHandle = open(self._expBatchFileName, "w") | |
1704 FileHandle.write("BatchNumber: 1\n") | |
1705 FileHandle.write("BatchName: INRA_Pinus_pinaster_HDZ31-1\n") | |
1706 FileHandle.write("GeneName: PpHDZ31\n") | |
1707 FileHandle.write("Description: \n") | |
1708 FileHandle.write("ContactNumber: 1\n") | |
1709 FileHandle.write("ProtocolNumber: 1\n") | |
1710 FileHandle.write("ThematicNumber: 1\n") | |
1711 FileHandle.write("RefSeqName: PpHDZ31_ref\n") | |
1712 FileHandle.write("AlignmentFileName: \n") | |
1713 FileHandle.write("SeqName: \n") | |
1714 FileHandle.write("//\n") | |
1715 FileHandle.close() | |
1716 | |
1717 | |
1718 def _writeInputFileSeveralBatches(self): | |
1719 if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)): | |
1720 os.mkdir(self._inputDirSeveralBatches) | |
1721 | |
1722 inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w") | |
1723 inFileHandle.write(">Sequence_de_Reference1\n") | |
1724 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1725 inFileHandle.write(">Line1\n") | |
1726 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") | |
1727 inFileHandle.write(">Line2\n") | |
1728 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") | |
1729 inFileHandle.close() | |
1730 | |
1731 inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w") | |
1732 inFileHandle2.write(">Sequence_de_Reference2\n") | |
1733 inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1734 inFileHandle2.write(">Line1\n") | |
1735 inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") | |
1736 inFileHandle2.write(">Line2\n") | |
1737 inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") | |
1738 inFileHandle2.close() | |
1739 | |
1740 def _writeInputFileSeveralBatches_different_lines_between_files(self): | |
1741 if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)): | |
1742 os.mkdir(self._inputDirSeveralBatches) | |
1743 | |
1744 inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w") | |
1745 inFileHandle.write(">Sequence_de_Reference1\n") | |
1746 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1747 inFileHandle.write(">Line1\n") | |
1748 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") | |
1749 inFileHandle.write(">Line2\n") | |
1750 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") | |
1751 inFileHandle.close() | |
1752 | |
1753 inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w") | |
1754 inFileHandle2.write(">Sequence_de_Reference2\n") | |
1755 inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1756 inFileHandle2.write(">Line3\n") | |
1757 inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") | |
1758 inFileHandle2.write(">Line4\n") | |
1759 inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") | |
1760 inFileHandle2.close() | |
1761 | |
1762 def _writeInputFileSeveralBatches_different_lines_and_same_refseq_between_files(self): | |
1763 if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)): | |
1764 os.mkdir(self._inputDirSeveralBatches) | |
1765 | |
1766 inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w") | |
1767 inFileHandle.write(">Sequence_de_Reference1\n") | |
1768 inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1769 inFileHandle.write(">Line1\n") | |
1770 inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") | |
1771 inFileHandle.write(">Line2\n") | |
1772 inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") | |
1773 inFileHandle.close() | |
1774 | |
1775 inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w") | |
1776 inFileHandle2.write(">Sequence_de_Reference1\n") | |
1777 inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") | |
1778 inFileHandle2.write(">Line3\n") | |
1779 inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") | |
1780 inFileHandle2.write(">Line4\n") | |
1781 inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") | |
1782 inFileHandle2.close() | |
1783 | |
1784 | |
1785 if __name__ == "__main__": | |
1786 unittest.main() |