Mercurial > repos > yufei-luo > s_mart
view commons/core/parsing/test/Test_Multifasta2SNPFile.py @ 9:1eb55963fe39
Updated CompareOverlappingSmall*.py
author | m-zytnicki |
---|---|
date | Thu, 14 Mar 2013 05:23:05 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line source
import os import shutil import unittest from commons.core.utils.FileUtils import FileUtils from commons.core.parsing.Multifasta2SNPFile import Multifasta2SNPFile from commons.core.parsing.Multifasta2SNPFile import ReferenceBioseqAndLinesBioseqDBWrapper from commons.core.seq.Bioseq import Bioseq from commons.core.seq.BioseqDB import BioseqDB from smac_pipe.tests.Utils4Test import Utils4Test class Test_Multifasta2SNPFile(unittest.TestCase): # TODO TEST LOGFILE def setUp(self): os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"]) self._inFileName = "multifasta_input.fasta" self._expSubSNPFileName = "%s/commons/core/parsing/test/expSubSNP.csv" % os.environ["REPET_PATH"] self._expAlleleFileName = "%s/commons/core/parsing/test/expAllele.csv" % os.environ["REPET_PATH"] self._expIndividualFileName = "%s/commons/core/parsing/test/expIndividual.csv" % os.environ["REPET_PATH"] self._expSequenceFSAFileName = "%s/commons/core/parsing/test/expSequences.fsa" % os.environ["REPET_PATH"] self._expSequenceCSVFileName = "%s/commons/core/parsing/test/expSequences.csv" % os.environ["REPET_PATH"] self._expBatchFileName = "%s/commons/core/parsing/test/expBatch.txt" % os.environ["REPET_PATH"] self._expBatchLineFileName = "%s/commons/core/parsing/test/expBatchLine.csv" % os.environ["REPET_PATH"] self._realInputFileName = "data/real_multifasta_input.fasta" self._realExpSubSNPFileName = "data/realExpSubSNP.csv" self._realExpSequenceFSAFileName = "data/realExpSequences.fsa" self._realExpBatchLineFileName = "data/realExpBatchLine.csv" self._realExpIndividualFileName = "data/realExpIndividual.csv" self._inputDirSeveralBatches = "%s/commons/core/parsing/test/severalBatchDir" % os.environ["REPET_PATH"] self._obsSubSNPFileName = "SubSNP.csv" self._obsAlleleFileName = "Allele.csv" self._obsIndividualFileName = "Individual.csv" self._obsSequenceFSAFileName = "Sequences.fsa" self._obsSequenceCSVFileName = "Sequences.csv" self._obsBatchFileName = "Batch.txt" self._obsBatchLineFileName = "BatchLine.csv" self._fileUtils = FileUtils() def tearDown(self): os.chdir("%s/commons/core/parsing/test/" % os.environ["REPET_PATH"]) logFileName = "multifasta2SNP.log" if self._fileUtils.isRessourceExists(self._inFileName): os.remove(self._inFileName) if self._fileUtils.isRessourceExists(self._obsSubSNPFileName): os.remove(self._obsSubSNPFileName) if self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered"): os.remove(self._obsSubSNPFileName + "_filtered") if self._fileUtils.isRessourceExists(self._obsAlleleFileName): os.remove(self._obsAlleleFileName) if self._fileUtils.isRessourceExists(self._obsIndividualFileName): os.remove(self._obsIndividualFileName) if self._fileUtils.isRessourceExists(self._obsSequenceFSAFileName): os.remove(self._obsSequenceFSAFileName) if self._fileUtils.isRessourceExists(self._obsSequenceCSVFileName): os.remove(self._obsSequenceCSVFileName) if self._fileUtils.isRessourceExists(self._obsBatchFileName): os.remove(self._obsBatchFileName) if self._fileUtils.isRessourceExists(self._obsBatchLineFileName): os.remove(self._obsBatchLineFileName) if self._fileUtils.isRessourceExists(self._expSubSNPFileName): os.remove(self._expSubSNPFileName) if self._fileUtils.isRessourceExists(self._realExpSubSNPFileName + "_filtered"): os.remove(self._realExpSubSNPFileName + "_filtered") if self._fileUtils.isRessourceExists(self._expAlleleFileName): os.remove(self._expAlleleFileName) if self._fileUtils.isRessourceExists(self._expIndividualFileName): os.remove(self._expIndividualFileName) if self._fileUtils.isRessourceExists(self._expSequenceFSAFileName): os.remove(self._expSequenceFSAFileName) if self._fileUtils.isRessourceExists(self._expSequenceCSVFileName): os.remove(self._expSequenceCSVFileName) if self._fileUtils.isRessourceExists(self._expBatchFileName): os.remove(self._expBatchFileName) if self._fileUtils.isRessourceExists(self._expBatchLineFileName): os.remove(self._expBatchLineFileName) if self._fileUtils.isRessourceExists(logFileName): os.remove(logFileName) if self._fileUtils.isRessourceExists(self._inputDirSeveralBatches): shutil.rmtree(self._inputDirSeveralBatches) def test_runOneBatch(self): self._writeInputFile() self._writeExpSubSNPFile() self._writeExpAlleleFile() self._writeExpIndividualFile() self._writeExpSequenceFile() self._writeExpBatchFile() self._writeExpBatchLineFile() multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase") multifasta2SNPFile.runOneBatch(self._inFileName) self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName)) def test_runOneBatch_with_a_real_input_file(self): self._writeRealExpAlleleFile() self._writeRealExpSequenceCSVFile() self._writeRealExpBatchFile() multifasta2SNPFile = Multifasta2SNPFile("Pinus pinaster", "INRA_Pinus_pinaster_HDZ31-1", "PpHDZ31") multifasta2SNPFile.runOneBatch(self._realInputFileName) self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._realExpIndividualFileName, self._obsIndividualFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._realExpSequenceFSAFileName, self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._realExpBatchLineFileName, self._obsBatchLineFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._realExpSubSNPFileName , self._obsSubSNPFileName)) def test_runOneBatch_with_errors_in_refSeq(self): self._writeInputFileWithSeqErrorsInRefSeq() multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase") self.assertRaises(Exception, multifasta2SNPFile.runOneBatch, self._inFileName, self._obsSubSNPFileName) def test_runOneBatch_with_errors_in_lineSeq(self): self._writeInputFileWithSeqErrorsInOneLineSeq() multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase") self.assertRaises(Exception, multifasta2SNPFile.runOneBatch, self._inFileName, self._obsSubSNPFileName) def test_runOneBatch_with_a_several_lineSeq(self): self._writeInputFileWithASeveralLineSeq() self._writeExpSubSNPFileSeveralLineSeq() self._writeExpAlleleFile() self._writeExpIndividualFile() self._writeExpSequenceFileSeveralLineSeq() self._writeExpBatchFile() self._writeExpBatchLineFile() multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana", "Batch1", "methyltransferase") multifasta2SNPFile.runOneBatch(self._inFileName) self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName)) def test_runOneBatch_with_2_seqs_with_the_same_name(self): self._writeInputFileWith2SeqsWithTheSameName() batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" isSysExitRaised = False multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) try: multifasta2SNPFile.runOneBatch(self._inFileName) except SystemExit: isSysExitRaised = True self.assertTrue(isSysExitRaised) def test_runOneBatch_with_indels_and_snps(self): self._writeInputFileWithSnpsAndIndels() self._writeExpSubSNPFileWithSnpsAndIndels() self._writeExpAlleleFileWithSnpsAndIndels() self._writeExpIndividualFile() self._writeExpSequenceFileWithDeletion() self._writeExpBatchFile() self._writeExpBatchLineFile() batchName = "Batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) multifasta2SNPFile.runOneBatch(self._inFileName) self.assertTrue(FileUtils.isRessourceExists(self._obsIndividualFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._obsIndividualFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsBatchFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._obsBatchFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsBatchLineFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._obsBatchLineFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsAlleleFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._obsAlleleFileName)) self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._obsSubSNPFileName)) def test_runOneBatchWithPotentialDooblons(self): self._writeInputFileBatchWithPotentialDooblons() batchName = "Batch_AU247387" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) multifasta2SNPFile.runOneBatch(self._inFileName) self.assertTrue(FileUtils.isRessourceExists(self._obsSubSNPFileName)) expSubSNPFile = "data/ExpPotDooblonsSubSNP.csv" Utils4Test.removeOneSpecifiedColumn(expSubSNPFile, ";", 8) Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName, ";", 8) Utils4Test.removeOneSpecifiedColumn(expSubSNPFile + "_filtered", ";", 9) Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName + "_filtered", ";", 9) Utils4Test.removeOneSpecifiedColumn(expSubSNPFile + "_filtered_filtered", ";", 13) Utils4Test.removeOneSpecifiedColumn(self._obsSubSNPFileName + "_filtered_filtered", ";", 13) comparableExpSubSNPFile = expSubSNPFile + "_filtered_filtered_filtered" comparableObsSubSNPFile = self._obsSubSNPFileName + "_filtered_filtered_filtered" self.assertTrue(FileUtils.isRessourceExists(comparableExpSubSNPFile)) self.assertTrue(FileUtils.isRessourceExists(comparableObsSubSNPFile)) self.assertTrue(FileUtils.are2FilesIdentical(comparableExpSubSNPFile, comparableObsSubSNPFile)) if(self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered")): os.remove(self._obsSubSNPFileName + "_filtered") if(self._fileUtils.isRessourceExists(expSubSNPFile + "_filtered")): os.remove(expSubSNPFile + "_filtered") if(self._fileUtils.isRessourceExists(self._obsSubSNPFileName + "_filtered_filtered")): os.remove(self._obsSubSNPFileName + "_filtered_filtered") if(self._fileUtils.isRessourceExists(expSubSNPFile + "_filtered_filtered")): os.remove(expSubSNPFile + "_filtered_filtered") if self._fileUtils.isRessourceExists(comparableExpSubSNPFile): os.remove(comparableExpSubSNPFile) if self._fileUtils.isRessourceExists(comparableObsSubSNPFile): os.remove(comparableObsSubSNPFile) def test_runSeveralBatches(self): self._writeInputFileSeveralBatches() self._writeExpSubSNPFileSeveralBatches() self._writeExpAlleleFileSeveralBatches() self._writeExpIndividualFile() self._writeExpSequenceSeveralBatches() self._writeExpBatchFileSeveralBatches() self._writeExpBatchLineFileSeveralBatches() multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana") multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsAlleleFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._inputDirSeveralBatches + "/" + self._obsAlleleFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" +self._obsIndividualFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._inputDirSeveralBatches + "/" + self._obsIndividualFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._inputDirSeveralBatches + "/" + self._obsBatchFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName)) def test_runSeveralBatches_different_lines_between_files(self): self._writeInputFileSeveralBatches_different_lines_between_files() self._writeExpSubSNPFileSeveralBatches_different_lines_between_files() self._writeExpAlleleFileSeveralBatches() self._writeExpIndividualFile_different_lines_between_files() self._writeExpSequenceSeveralBatches() self._writeExpBatchFileSeveralBatches() self._writeExpBatchLineFileSeveralBatches_different_lines_between_files() multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana") multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsAlleleFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expAlleleFileName, self._inputDirSeveralBatches + "/" + self._obsAlleleFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" +self._obsIndividualFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expIndividualFileName, self._inputDirSeveralBatches + "/" + self._obsIndividualFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceFSAFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceFSAFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSequenceCSVFileName, self._inputDirSeveralBatches + "/" + self._obsSequenceCSVFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchFileName, self._inputDirSeveralBatches + "/" + self._obsBatchFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expBatchLineFileName, self._inputDirSeveralBatches + "/" + self._obsBatchLineFileName)) self.assertTrue(FileUtils.isRessourceExists(self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName)) self.assertTrue(FileUtils.are2FilesIdentical(self._expSubSNPFileName, self._inputDirSeveralBatches + "/" + self._obsSubSNPFileName)) def test_runSeveralBatches_different_lines_and_same_refseq_between_files(self): self._writeInputFileSeveralBatches_different_lines_and_same_refseq_between_files() self._writeExpSubSNPFileSeveralBatches_different_lines_between_files() self._writeExpAlleleFileSeveralBatches() self._writeExpIndividualFile_different_lines_between_files() self._writeExpSequenceSeveralBatchesForSameRefSeq() self._writeExpBatchFileSeveralBatchesForSameRefSeq() self._writeExpBatchLineFileSeveralBatches_different_lines_between_files() multifasta2SNPFile = Multifasta2SNPFile("Arabidopsis thaliana") try: multifasta2SNPFile.runSeveralBatches(self._inputDirSeveralBatches) except Exception, e : self.assertRaises(Exception, e) def test_detectSNPAndIndels(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) refBioseq.sequence = "ATTCGCGTATGCGTATGCTT" refBioseq.header = "reference" bs1 = Bioseq( "line1", "ATCCGCGTATGCGTATGATT" ) bs2 = Bioseq( "line2", "ATTCGTGTATGCGTATGGTT" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) dExpAllele = {'C': 1, 'A': 2, 'T': 3, 'G': 4 } lExpSNP = [{'subSNPName': batchName + "_SNP_3_line1", 'position': 3, 'lineName': 1, 'allele': 1, '5flank': "AT", '3flank': "CGCGTATGCGTATGATT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_3_line2", 'position': 3, 'lineName': 2, 'allele': 3, '5flank': "AT", '3flank': "CGTGTATGCGTATGGTT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 3, '5flank': "ATTCG", '3flank': "GTATGCGTATGGTT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 1, '5flank': "ATCCG", '3flank': "GTATGCGTATGATT",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_18_line1", 'position': 18, 'lineName': 1, 'allele': 2, '5flank': "ATCCGCGTATGCGTATG", '3flank': "TT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_18_line2", 'position': 18, 'lineName': 2, 'allele': 4, '5flank': "ATTCGTGTATGCGTATG", '3flank': "TT", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}] lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"},] self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) def test_detectSNPAndIndels_no_polym(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) refBioseq.sequence = "ATTCGCGTATGCGTATGCTT" refBioseq.header = "reference" bs1 = Bioseq( "line1", "ATTCGCGTATGCGTATGCTT" ) bs2 = Bioseq( "line2", "ATTCGCGTATGCGTATGCTT" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) instance = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) multifasta2SNPFile.detectSNPsAndIndels(instance) lExpSNP = [] self.assertEquals(lExpSNP, multifasta2SNPFile._lSubSNPFileResults) def test_detectSNPAndIndels_with_only_dels(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) refBioseq.sequence = "ATTACCGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "A--ACCGAA" ) bs2 = Bioseq( "line2", "---ACCGAA" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) dExpAllele = {'A--': 1, '---': 2} lExpSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 3}, {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 1, '5flank': "", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 3}] lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}] self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) def test_detectSNPAndIndels_with_dels_and_snps(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) refBioseq.sequence = "ATTACCGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "A--ACCGAA" ) bs2 = Bioseq( "line2", "---ACCGAA" ) bs3 = Bioseq( "line3", "ATTACCGGA" ) bs4 = Bioseq( "line4", "----CCGAA" ) alignedBioseqDB.setData( [ bs1, bs2, bs3, bs4 ] ) multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) dExpAllele = {'G': 1, 'A--A': 2, '---A': 3, '----': 4, 'ATTA': 5, 'A': 6} lExpSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}] lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 4, 'individualName': "line4", 'scientificName': "Arabidopsis thaliana"}] self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) def test_detectSNPAndIndels_with_only_inserts(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) refBioseq.sequence = "A--ACCGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "A--ACCGAA" ) bs2 = Bioseq( "line2", "AG-ACCGAA" ) bs3 = Bioseq( "line3", "ATTACCGAA" ) alignedBioseqDB.setData( [ bs1, bs2, bs3 ] ) multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) dExpAllele = {'G-': 1, 'TT': 2, '--': 3} lExpSNP = [{'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 1, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 2, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 3, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}] lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}] self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) def test_detectSNPAndIndels_with_snps_and_inserts(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) refBioseq.sequence = "A--ACCGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "A--ACCGAA" ) bs2 = Bioseq( "line2", "AG-ACCGAA" ) bs3 = Bioseq( "line3", "ATTACCGCA" ) alignedBioseqDB.setData( [ bs1, bs2, bs3 ] ) multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) dExpAllele = {'C': 1, 'G-': 2, 'TT': 3, '--': 4, 'A' : 5} lExpSNP = [{'subSNPName': batchName + "_SNP_6_line3", 'position': 6, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 5, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 5, '5flank': "AG-ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 3, '5flank': "A", '3flank': "ACCGCA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 4, '5flank': "A", '3flank': "ACCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}] lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}] self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) def test_detectSNPAndIndels_with_snps_inserts_and_dels(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) refBioseq.sequence = "A--ACCGAATATAC" refBioseq.header = "reference" bs1 = Bioseq( "line1", "A--ACCGAATATAC" ) bs2 = Bioseq( "line2", "AG-ACCGAAT--AC" ) bs3 = Bioseq( "line3", "ATTACCGCA-----" ) alignedBioseqDB.setData( [ bs1, bs2, bs3 ] ) multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) multifasta2SNPFile._dBatchResults = {'BatchNumber': 1, 'BatchName': "Batch1", 'GeneName': "methyltransferase", 'RefSeqName': "Sequence_de_Reference"} multifasta2SNPFile.detectSNPsAndIndels(multifasta2SNPFile._wrapper) dExpAllele = {'C': 1, 'G-': 2, 'T--AC': 3, 'TT': 4, '-----': 5, '--': 6, 'TATAC': 7, 'A': 8} lExpSNP = [{'subSNPName': batchName + "_SNP_6_line3", 'position': 6, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A-----", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_6_line1", 'position': 6, 'lineName': 1, 'allele': 8, '5flank': "A--ACCG", '3flank': "ATATAC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_6_line2", 'position': 6, 'lineName': 2, 'allele': 8, '5flank': "AG-ACCG", '3flank': "AT--AC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_INS_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, '5flank': "A", '3flank': "ACCGAAT--AC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, {'subSNPName': batchName + "_INS_1_line3", 'position': 1, 'lineName': 3, 'allele': 4, '5flank': "A", '3flank': "ACCGCA-----", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, {'subSNPName': batchName + "_INS_1_line1", 'position': 1, 'lineName': 1, 'allele': 6, '5flank': "A", '3flank': "ACCGAATATAC", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "INSERTION", 'length': 2}, {'subSNPName': batchName + "_DEL_8_line2", 'position': 8, 'lineName': 2, 'allele': 3, '5flank': "AG-ACCGAA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5}, {'subSNPName': batchName + "_DEL_8_line3", 'position': 8, 'lineName': 3, 'allele': 5, '5flank': "ATTACCGCA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5}, {'subSNPName': batchName + "_DEL_8_line1", 'position': 8, 'lineName': 1, 'allele': 7, '5flank': "A--ACCGAA", '3flank': "", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 5}] lExpIndividual = [{'individualNumber': 1, 'individualName': "line1", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 2, 'individualName': "line2", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 3, 'individualName': "line3", 'scientificName': "Arabidopsis thaliana"}] self.assertEquals(dExpAllele, multifasta2SNPFile._dAlleleFileResults) self.assertEquals(multifasta2SNPFile._sortSubSNPResultByBatchPositionAndLineName(lExpSNP), multifasta2SNPFile._lSubSNPFileResults) self.assertEquals(lExpIndividual, multifasta2SNPFile._lIndividualFileResults) def test_createWrapperFromFile_with_upcase_and_lowcase_nucleotide(self): self._writeInputFileWithUpcaseAndLowcaseNucleotide() batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) expLineBioseqDB = BioseqDB() expRefBioseq = Bioseq("Sequence_de_Reference",\ "CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA") iBioSeq = Bioseq("Line1","CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA") expLineBioseqDB.add ( iBioSeq ) iBioSeq = Bioseq("Line2","CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA") expLineBioseqDB.add ( iBioSeq ) expBioseqDBWrapper = ReferenceBioseqAndLinesBioseqDBWrapper (expRefBioseq, expLineBioseqDB, multifasta2SNPFile._logFile, self._inFileName) obsBioseqDBWrapper = multifasta2SNPFile.createWrapperFromFile(self._inFileName) self.assertEquals(obsBioseqDBWrapper._iReferenceBioseq, expBioseqDBWrapper._iReferenceBioseq) self.assertEquals(obsBioseqDBWrapper._iLinesBioseqDB, expBioseqDBWrapper._iLinesBioseqDB) def test_checkHeaderAlphabet(self): # header ALPHABET [^a-zA-Z0-9_-:] batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) strToBeCheck="abcdefghijklmnopqrstuvwxyz0912834567_:-" self.assertTrue ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck)) strToBeCheck="ABCDEFGHIJKLMNOPQRSTUVWXYZ0912834567_:-" self.assertTrue ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck)) def test_checkHeaderAlphabet_empty_string(self): batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) strToBeCheck="" self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck)) def test_checkHeaderAlphabet_space(self): batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) strToBeCheck=" " self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck)) def test_checkHeaderAlphabet_non_aphabetical(self): batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) strToBeCheck="}" self.assertFalse ( multifasta2SNPFile.checkHeaderAlphabet(strToBeCheck)) def test_isDNA_bases( self ): batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) strToBeCheck="TGTGGCTTCTAGTTGATCAGTTTATGATCACAATGATTTCACGTAGGTGTCTCGTGGCTCCGACTAATCAACAATATAATGCGAGTAGAGCTTGA" self.assertTrue ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) def test_isDNA_bases_non_DNA_letter( self ): batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) strToBeCheck="XTAGTTGATCA" self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) def test_isDNA_bases_carriage_return( self ): batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) strToBeCheck="TA\nGTTGATCA" self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) def test_isDNA_bases_empty_string( self ): batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) strToBeCheck="" self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) def test_isDNA_bases_space( self ): batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) strToBeCheck=" " self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) def test_isDNA_bases_IUPAC_letter_but_non_DNA_bases( self ): batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) strToBeCheck="UMWSB" self.assertFalse ( multifasta2SNPFile.isDNA_bases(strToBeCheck)) def test_getLineAsAHeader (self): lineToBeCheck=">test on good header" batchName = "batch1" expHeader = "test_on_good_header" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) obsHeader = multifasta2SNPFile.getLineAsAHeader(lineToBeCheck) self.assertEqual(obsHeader,expHeader) def test_getLineAsAHeader_warning_bad_header_tag_omitted(self): lineToBeCheck="test on bad header with tag omitted" batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) try : expHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck ) except Exception, e : self.assertRaises(Exception, e , self._inFileName, self._obsSubSNPFileName) def test_getLineAsAHeader_warning_repeated_blanks_removed(self): lineToBeCheck =">test on header \twith warning" expHeader = "test_on_header_with_warning" batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) obsHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck ) self.assertEquals( obsHeader, expHeader) self.assertRaises(Exception, multifasta2SNPFile.getLineAsAHeader( lineToBeCheck ) , self._inFileName, self._obsSubSNPFileName) def test_getLineAsAHeader_fatal_error_bad_header(self): lineToBeCheck=">test\on bad header with fatal error" batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) try : expHeader = multifasta2SNPFile.getLineAsAHeader( lineToBeCheck ) except Exception, e : self.assertRaises(Exception, e , self._inFileName, self._obsSubSNPFileName) def test_isHeaderInRefSeqList(self): header = "line1" bs1 = Bioseq( "line1", "A--ACCGAATATAC" ) bs2 = Bioseq( "line2", "AG-ACCGAAT--AC" ) bs3 = Bioseq( "line3", "ATTACCGCA-----" ) batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) multifasta2SNPFile._lRefSequences = [bs1, bs2, bs3] try: isHeader = multifasta2SNPFile.isHeaderInRefSeqList(header) except Exception, e : self.assertRaises(Exception, e) def test_completeAlleleSetWithCurrentAllele_one_allele_added(self): dAlleleSetInInput = {"A" : 1, "T" : 2, "G" : 3} alleleToAdd = "C" dAlleleExpSet = {"A" : 1, "T" : 2, "G" : 3, "C" : 4} batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd) self.assertEquals(dAlleleObsSet, dAlleleExpSet) def test_completeAlleleSetWithCurrentAllele_no_allele_added(self): dAlleleSetInInput = {"A" : 1, "T" : 2, "G" : 3} alleleToAdd = "T" dAlleleExpSet = {"A" : 1, "T" : 2, "G" : 3} batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd) self.assertEquals(dAlleleObsSet, dAlleleExpSet) def test_completeAlleleSetWithCurrentAllele_with_an_empty_allele_set(self): dAlleleSetInInput = {} alleleToAdd = "T" dAlleleExpSet = {"T" : 1} batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) dAlleleObsSet = multifasta2SNPFile._completeAlleleSetWithCurrentAllele(dAlleleSetInInput, alleleToAdd) self.assertEquals(dAlleleObsSet, dAlleleExpSet) def test_completeBatchLineListWithCurrentIndividual(self): #TODO: this test only pass with a batchNumber of 1 iCurrentBatchNumber = 1 lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': iCurrentBatchNumber}, {'IndividualNumber': "2", 'BatchNumber': iCurrentBatchNumber}] lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}] lExpBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': iCurrentBatchNumber}, {'IndividualNumber': "2", 'BatchNumber': iCurrentBatchNumber}, {'IndividualNumber': "3", 'BatchNumber': iCurrentBatchNumber}] lineName2Add = "Individual3" batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add) self.assertEquals(lBatchLineResults, lExpBatchLineResults) def test_completeBatchLineListWithCurrentIndividual_no_entries_in_batchline_results_in_input(self): lBatchLineResults = [] lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}] lExpBatchLineResults = [{'IndividualNumber': "2", 'BatchNumber': 1}] lineName2Add = "Individual2" batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add) self.assertEquals(lBatchLineResults, lExpBatchLineResults) def test_completeBatchLineListWithCurrentIndividual_no_individual_in_individualList(self): lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': 1}, {'IndividualNumber': "2", 'BatchNumber': 1}] lIndividualResults = [] lineName2Add = "Individual3" batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) try: lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add) except Exception, e : self.assertRaises(Exception, e) def test_completeBatchLineListWithCurrentIndividual_individual_added_has_no_individual_number(self): lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': "1"}, {'IndividualNumber': "2", 'BatchNumber': "1"}] lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"}, {'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}] lineName2Add = "Individual3" batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) try: lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add) except Exception, e : self.assertRaises(Exception, e) def test_completeBatchLineListWithCurrentIndividual_individual_not_present_in_individualList(self): lBatchLineResults = [{'IndividualNumber': "1", 'BatchNumber': "1"}, {'IndividualNumber': "2", 'BatchNumber': "1"}] lIndividualResults = [{'individualNumber': 1, 'individualName': "Individual1", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 2, 'individualName': "Individual2", 'scientificName': "Arabidopsis thaliana"}, {'individualNumber': 3, 'individualName': "Individual3", 'scientificName': "Arabidopsis thaliana"}] lineName2Add = "Michael Corleone" batchName = "batch1" taxon = "Arabidopsis thaliana" gene = "methyltransferase" multifasta2SNPFile = Multifasta2SNPFile(taxon, batchName, gene) try: lBatchLineResults = multifasta2SNPFile._completeBatchLineListWithCurrentIndividual(lBatchLineResults, lIndividualResults, lineName2Add) except Exception, e : self.assertRaises(Exception, e) def test_findASubSNPInAListWithHisName(self): lSubSNPList = [{'subSNPName': "SubSNP_batch1_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"}, {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"}, {'subSNPName': "SubSNP_batch1_6_line1", 'position': 6, 'lineName': 1, 'allele': 3, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP"}] name = "SubSNP_batch1_2_line1" dExpSubSNP = {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"} expIndice = 1 multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") dObsSubSNP, obsIndice = multifasta2SNPFile.findASubSNPInAListWithHisName(name, lSubSNPList) self.assertEquals(expIndice, obsIndice) self.assertEquals(dExpSubSNP, dObsSubSNP) def test_findASubSNPInAListWithHisName_SubSNP_not_found(self): lSubSNPList = [{'subSNPName': "SubSNP_batch1_1_line2", 'position': 1, 'lineName': 2, 'allele': 2, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"}, {'subSNPName': "SubSNP_batch1_2_line1", 'position': 1, 'lineName': 1, 'allele': 1, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION"}, {'subSNPName': "SubSNP_batch1_6_line1", 'position': 6, 'lineName': 1, 'allele': 3, 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP"}] name = "SubSNP_fake" multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") try: dObsSubSNP, obsIndice = multifasta2SNPFile.findASubSNPInAListWithHisName(name, lSubSNPList) except Exception, e : self.assertRaises(Exception, e) def test_clusteriseIndels(self): multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, {'name' : "indel2", 'start': 12, 'end': 15}, {'name' : "indel3",'start': 5, 'end': 10}] dIndel = {'start': 1, 'end': 6} lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList) lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 10}, {'name' : "indel2", 'start': 12, 'end': 15}, {'name' : "indel3", 'start': 1, 'end': 10}] self.assertEquals(lexpIndelsList, lObsIndelsList) def test_clusteriseIndels_no_overlap(self): multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, {'name' : "indel2", 'start': 12, 'end': 15}, {'name' : "indel3",'start': 25, 'end': 30}] dIndel = {'start': 1, 'end': 6} lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList) lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, {'name' : "indel2", 'start': 12, 'end': 15}, {'name' : "indel3", 'start': 25, 'end': 30}] self.assertEquals(lexpIndelsList, lObsIndelsList) def test_clusteriseIndels_many_overlaps_complicated(self): multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") lObsIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, {'name' : "indel2", 'start': 12, 'end': 15}, {'name' : "indel3",'start': 5, 'end': 10}, {'name' : "indel4",'start': 9, 'end': 40}] dIndel = {'start': 5, 'end': 10} lObsIndelsList = multifasta2SNPFile.clusteriseIndels(dIndel, lObsIndelsList) lexpIndelsList = [{'name' : "indel1", 'start': 1, 'end': 40}, {'name' : "indel2", 'start': 1, 'end': 40}, {'name' : "indel3", 'start': 1, 'end': 40}, {'name' : "indel4",'start': 1, 'end': 40}] self.assertEquals(lexpIndelsList, lObsIndelsList) def test_updateBoundsForAnIndelInAnIndelList(self): lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, {'name' : "indel2", 'start': 12, 'end': 15}, {'name' : "indel3",'start': 5, 'end': 10}, {'name' : "indel4",'start': 9, 'end': 40}] dIndelWithNewBounds = {'name': "indel2", 'start': 7, 'end': 19} multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds) lExpNewIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, {'name' : "indel2", 'start': 7, 'end': 19}, {'name' : "indel3",'start': 5, 'end': 10}, {'name' : "indel4",'start': 9, 'end': 40}] self.assertEquals(lExpNewIndelsList, lObsNewIndelsList) def test_updateBoundsForAnIndelInAnIndelList_no_update_to_do(self): lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, {'name' : "indel2", 'start': 12, 'end': 15}, {'name' : "indel3",'start': 5, 'end': 10}, {'name' : "indel4",'start': 9, 'end': 40}] dIndelWithNewBounds = {'name': "indel2", 'start': 12, 'end': 15} multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds) lExpNewIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, {'name' : "indel2", 'start': 12, 'end': 15}, {'name' : "indel3",'start': 5, 'end': 10}, {'name' : "indel4",'start': 9, 'end': 40}] self.assertEquals(lExpNewIndelsList, lObsNewIndelsList) def test_updateBoundsForAnIndelInAnIndelList_indel_2_update_does_not_exist(self): lIndelsList = [{'name' : "indel1", 'start': 1, 'end': 6}, {'name' : "indel2", 'start': 12, 'end': 15}, {'name' : "indel3",'start': 5, 'end': 10}, {'name' : "indel4",'start': 9, 'end': 40}] dIndelWithNewBounds = {'name': "DeNiro", 'start': 12, 'end': 15} multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") try: lObsNewIndelsList = multifasta2SNPFile.updateBoundsForAnIndelInAnIndelList(lIndelsList, dIndelWithNewBounds) except Exception, e : self.assertRaises(Exception, e) def test_mergeBoundsFor2Indels(self): multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") dIndel1 = {'start': 1, 'end': 4} dIndel2 = {'start': 2, 'end': 15} dIndel1, dIndel2 = multifasta2SNPFile.mergeBoundsForTwoOverlappingIndels(dIndel1, dIndel2) dExpIndel1 = {'start': 1, 'end': 15} dExpIndel2 = {'start': 1, 'end': 15} self.assertEquals(dExpIndel1, dIndel1) self.assertEquals(dExpIndel2, dIndel2) def test_mergeBoundsFor2Indels_no_overlap(self): multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") dIndel1 = {'start': 1, 'end': 4} dIndel2 = {'start': 5, 'end': 15} dIndel1, dIndel2 = multifasta2SNPFile.mergeBoundsForTwoOverlappingIndels(dIndel1, dIndel2) dExpIndel1 = {'start': 1, 'end': 4} dExpIndel2 = {'start': 5, 'end': 15} self.assertEquals(dExpIndel1, dIndel1) self.assertEquals(dExpIndel2, dIndel2) def test_getUngappedPositionInRefSeq(self): multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") refBioseq = Bioseq() alignedBioseqDB = BioseqDB() refBioseq.sequence = "A--TTACC-GAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) bs2 = Bioseq( "line2", "AACTTACC-GAA" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) expUngappedPositionFor1 = 1 obsUngappedPositionFor1 = multifasta2SNPFile.getUngappedPositionInRefSeq(1) expUngappedPositionFor5 = 3 obsUngappedPositionFor5 = multifasta2SNPFile.getUngappedPositionInRefSeq(5) expUngappedPositionFor10 = 7 obsUngappedPositionFor10 = multifasta2SNPFile.getUngappedPositionInRefSeq(10) self.assertEquals(expUngappedPositionFor1, obsUngappedPositionFor1) self.assertEquals(expUngappedPositionFor5, obsUngappedPositionFor5) self.assertEquals(expUngappedPositionFor10, obsUngappedPositionFor10) def test_getUngappedPositionInRefSeq_no_gap(self): multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") refBioseq = Bioseq() alignedBioseqDB = BioseqDB() refBioseq.sequence = "AACTTACCAGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) bs2 = Bioseq( "line2", "AACTTACC-GAA" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) expUngappedPositionFor1 = 1 obsUngappedPositionFor1 = multifasta2SNPFile.getUngappedPositionInRefSeq(1) expUngappedPositionFor5 = 5 obsUngappedPositionFor5 = multifasta2SNPFile.getUngappedPositionInRefSeq(5) expUngappedPositionFor10 = 10 obsUngappedPositionFor10 = multifasta2SNPFile.getUngappedPositionInRefSeq(10) self.assertEquals(expUngappedPositionFor1, obsUngappedPositionFor1) self.assertEquals(expUngappedPositionFor5, obsUngappedPositionFor5) self.assertEquals(expUngappedPositionFor10, obsUngappedPositionFor10) def test_checkAllSeq_sequences_with_different_sizes_one_seq_longer(self): multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") refBioseq = Bioseq() alignedBioseqDB = BioseqDB() refBioseq.sequence = "AACTTACCAGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) bs2 = Bioseq( "line2", "AACTTACC-GAATTTC" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) try: multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) except Exception, e : self.assertRaises(Exception, e) obsMsg = e.message expMsg = "File: " + self._inFileName + ", problem with the sequence " + bs2.header + ": its length is different from the reference seq! All the sequences must have the same length.\n" expMsg += "refseq length: " + str(len(refBioseq.sequence)) + "\n" expMsg += "seq length: " + str(len(bs2.sequence)) + "\n" self.assertEquals(expMsg, obsMsg) def test_checkAllSeq_sequences_with_different_sizes_one_seq_shorter(self): multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") refBioseq = Bioseq() alignedBioseqDB = BioseqDB() refBioseq.sequence = "AACTTACCAGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) bs2 = Bioseq( "line2", "AACTTACC" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) try: multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) except Exception, e : self.assertRaises(Exception, e) obsMsg = e.message expMsg = "File: " + self._inFileName + ", problem with the sequence " + bs2.header + ": its length is different from the reference seq! All the sequences must have the same length.\n" expMsg += "refseq length: " + str(len(refBioseq.sequence)) + "\n" expMsg += "seq length: " + str(len(bs2.sequence)) + "\n" self.assertEquals(expMsg, obsMsg) def test_getFlanksOfASubSNP(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() refBioseq.sequence = "AACTTACCAGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) bs2 = Bioseq( "line2", "AACTTACC-GAA" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) subsnpPosition = 3 polymLength = 3 lineName = "line1" exp5flank = "AA" exp3flank = "TCCAGAA" obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 7) self.assertEquals(exp5flank, obs5flank) self.assertEquals(exp3flank, obs3flank) def test_getFlanksOfASubSNP_flank_truncated(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() refBioseq.sequence = "AACTTACCAGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) bs2 = Bioseq( "line2", "AACTTACC-GAA" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) subsnpPosition = 3 polymLength = 3 lineName = "line1" exp5flank = "AA" exp3flank = "TCCAGAA" obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500) self.assertEquals(exp5flank, obs5flank) self.assertEquals(exp3flank, obs3flank) def test_getFlanksOfASubSNP_empty_seq(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() refBioseq.sequence = "" refBioseq.header = "reference" bs1 = Bioseq( "line1", "" ) bs2 = Bioseq( "line2", "" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) subsnpPosition = 3 polymLength = 3 lineName = "line1" exp5flank = "" exp3flank = "" obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500) self.assertEquals(exp5flank, obs5flank) self.assertEquals(exp3flank, obs3flank) def test_getFlanksOfASubSNP_flank_of_first_base(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() refBioseq.sequence = "AACTTACCAGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) bs2 = Bioseq( "line2", "AACTTACC-GAA" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) subsnpPosition = 1 polymLength = 1 lineName = "line1" exp5flank = "" exp3flank = "ACTTTCCAGAA" obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500) self.assertEquals(exp5flank, obs5flank) self.assertEquals(exp3flank, obs3flank) def test_getFlanksOfASubSNP_flank_of_first_base_with_polym_on_all_sequence(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() refBioseq.sequence = "AACTTACCAGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) bs2 = Bioseq( "line2", "AACTTACC-GAA" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) subsnpPosition = 1 polymLength = 12 lineName = "line1" exp5flank = "" exp3flank = "" obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500) self.assertEquals(exp5flank, obs5flank) self.assertEquals(exp3flank, obs3flank) def test_getFlanksOfASubSNP_flank_of_last_base_with_polym_on_all_sequence(self): refBioseq = Bioseq() alignedBioseqDB = BioseqDB() refBioseq.sequence = "AACTTACCAGAA" refBioseq.header = "reference" bs1 = Bioseq( "line1", "AACTTTCCAGAA" ) bs2 = Bioseq( "line2", "AACTTACC-GAA" ) alignedBioseqDB.setData( [ bs1, bs2 ] ) multifasta2SNPFile = Multifasta2SNPFile("batch1", "gene1", "mouse") multifasta2SNPFile._wrapper = ReferenceBioseqAndLinesBioseqDBWrapper(refBioseq, alignedBioseqDB, multifasta2SNPFile._logFile, self._inFileName) subsnpPosition = 12 polymLength = 1 lineName = "line1" exp5flank = "AACTTTCCAGA" exp3flank = "" obs5flank, obs3flank = multifasta2SNPFile.getFlanksOfASubSNP(lineName, subsnpPosition, polymLength, 500) self.assertEquals(exp5flank, obs5flank) self.assertEquals(exp3flank, obs3flank) # def test_subSNPExistsInSubSNPList_subSNP_exists(self): batchName = "batch1" lSubSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}] multifasta2SNPFile = Multifasta2SNPFile(batchName, "gene1", "mouse") dSearchedSubSNP = {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4} expResult = multifasta2SNPFile.subSNPExistsInSubSNPList(dSearchedSubSNP, lSubSNP) obsResult = True self.assertEquals(expResult, obsResult) def test_subSNPExistsInSubSNPList_subSNP_does_not_exist(self): batchName = "batch1" lSubSNP = [{'subSNPName': batchName + "_DEL_1_line2", 'position': 1, 'lineName': 2, 'allele': 3, '5flank': "", '3flank': "CCGAA", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, {'subSNPName': batchName + "_DEL_1_line1", 'position': 1, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, {'subSNPName': batchName + "_SNP_8_line3", 'position': 8, 'lineName': 3, 'allele': 1, '5flank': "ATTACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_8_line1", 'position': 8, 'lineName': 1, 'allele': 6, '5flank': "A--ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_8_line2", 'position': 8, 'lineName': 2, 'allele': 6, '5flank': "---ACCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_SNP_8_line4", 'position': 8, 'lineName': 4, 'allele': 6, '5flank': "----CCG", '3flank': "A", 'batchNumber': 1, 'confidenceValue' : "A", 'type' : "SNP", 'length': 1}, {'subSNPName': batchName + "_DEL_1_line4", 'position': 1, 'lineName': 4, 'allele': 4, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}, {'subSNPName': batchName + "_DEL_1_line3", 'position': 1, 'lineName': 3, 'allele': 5, '5flank': "", '3flank': "CCGGA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4}] multifasta2SNPFile = Multifasta2SNPFile(batchName, "gene1", "mouse") dSearchedSubSNP = {'subSNPName': batchName + "_DEL_12_line1", 'position': 12, 'lineName': 1, 'allele': 2, '5flank': "", '3flank': "CCGAA",'batchNumber': 1, 'confidenceValue' : "A", 'type' : "DELETION", 'length': 4} expResult = multifasta2SNPFile.subSNPExistsInSubSNPList(dSearchedSubSNP, lSubSNP) obsResult = False self.assertEquals(expResult, obsResult) def _writeExpSubSNPFile(self): expFileHandle = open(self._expSubSNPFileName, "w") expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n") expFileHandle.write("Batch1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n") expFileHandle.write("Batch1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") expFileHandle.write("Batch1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n") expFileHandle.write("Batch1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n") expFileHandle.write("Batch1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n") expFileHandle.close() def _writeExpSubSNPFileWithSnpsAndIndels(self): expFileHandle = open(self._expSubSNPFileName, "w") expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n") expFileHandle.write("Batch1_INS_1_Line1;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;1;1;Sequence;;;8\n") expFileHandle.write("Batch1_INS_1_Line2;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;1;2;Sequence;;;6\n") expFileHandle.write("Batch1_SNP_2_Line1;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n") expFileHandle.write("Batch1_SNP_2_Line2;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") expFileHandle.write("Batch1_DEL_8_Line1;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;1;1;Sequence;;;5\n") expFileHandle.write("Batch1_DEL_8_Line2;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;1;2;Sequence;;;7\n") expFileHandle.write("Batch1_SNP_19_Line1;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_19_Line2;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_23_Line1;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n") expFileHandle.write("Batch1_SNP_23_Line2;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_34_Line1;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n") expFileHandle.write("Batch1_SNP_34_Line2;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_49_Line1;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_49_Line2;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n") expFileHandle.close() def _writeExpSubSNPFileSeveralBatches(self): expFileHandle = open(self._expSubSNPFileName, "w") expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n") expFileHandle.write("Batch_Gene1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n") expFileHandle.write("Batch_Gene1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") expFileHandle.write("Batch_Gene1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n") expFileHandle.write("Batch_Gene1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch_Gene1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n") expFileHandle.write("Batch_Gene1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch_Gene1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n") expFileHandle.write("Batch_Gene1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch_Gene1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n") expFileHandle.write("Batch_Gene1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n") expFileHandle.write("Batch_Gene2_INS_1_Line1;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;2;1;Sequence;;;8\n") expFileHandle.write("Batch_Gene2_INS_1_Line2;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;2;2;Sequence;;;6\n") expFileHandle.write("Batch_Gene2_SNP_2_Line1;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;1\n") expFileHandle.write("Batch_Gene2_SNP_2_Line2;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;4\n") expFileHandle.write("Batch_Gene2_DEL_8_Line1;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;2;1;Sequence;;;5\n") expFileHandle.write("Batch_Gene2_DEL_8_Line2;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;2;2;Sequence;;;7\n") expFileHandle.write("Batch_Gene2_SNP_19_Line1;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;2\n") expFileHandle.write("Batch_Gene2_SNP_19_Line2;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;2\n") expFileHandle.write("Batch_Gene2_SNP_23_Line1;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;2;1;Sequence;;;3\n") expFileHandle.write("Batch_Gene2_SNP_23_Line2;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;2;2;Sequence;;;2\n") expFileHandle.write("Batch_Gene2_SNP_34_Line1;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;2;1;Sequence;;;3\n") expFileHandle.write("Batch_Gene2_SNP_34_Line2;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;2;2;Sequence;;;2\n") expFileHandle.write("Batch_Gene2_SNP_49_Line1;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;2;1;Sequence;;;2\n") expFileHandle.write("Batch_Gene2_SNP_49_Line2;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;2;2;Sequence;;;4\n") expFileHandle.close() def _writeExpSubSNPFileSeveralBatches_different_lines_between_files(self): expFileHandle = open(self._expSubSNPFileName, "w") expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n") expFileHandle.write("Batch_Gene1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;1\n") expFileHandle.write("Batch_Gene1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") expFileHandle.write("Batch_Gene1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;2\n") expFileHandle.write("Batch_Gene1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch_Gene1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;1;1;Sequence;;;3\n") expFileHandle.write("Batch_Gene1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch_Gene1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;1;1;Sequence;;;3\n") expFileHandle.write("Batch_Gene1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch_Gene1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;1;1;Sequence;;;2\n") expFileHandle.write("Batch_Gene1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;1;2;Sequence;;;4\n") expFileHandle.write("Batch_Gene2_INS_1_Line3;A;INSERTION;1;C;TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;2;2;3;Sequence;;;8\n") expFileHandle.write("Batch_Gene2_INS_1_Line4;A;INSERTION;1;C;AAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;2;2;4;Sequence;;;6\n") expFileHandle.write("Batch_Gene2_SNP_2_Line3;A;SNP;2;C--;AGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;1\n") expFileHandle.write("Batch_Gene2_SNP_2_Line4;A;SNP;2;CCT;AGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;4\n") expFileHandle.write("Batch_Gene2_DEL_8_Line3;A;DELETION;8;C--TAGCCA;CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC;3;2;3;Sequence;;;5\n") expFileHandle.write("Batch_Gene2_DEL_8_Line4;A;DELETION;8;CCTAAGCCA;CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA;3;2;4;Sequence;;;7\n") expFileHandle.write("Batch_Gene2_SNP_19_Line3;A;SNP;19;C--TAGCCA---CTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;2\n") expFileHandle.write("Batch_Gene2_SNP_19_Line4;A;SNP;19;CCTAAGCCATT-CTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;2\n") expFileHandle.write("Batch_Gene2_SNP_23_Line3;A;SNP;23;C--TAGCCA---CTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATC;1;2;3;Sequence;;;3\n") expFileHandle.write("Batch_Gene2_SNP_23_Line4;A;SNP;23;CCTAAGCCATT-CTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATA;1;2;4;Sequence;;;2\n") expFileHandle.write("Batch_Gene2_SNP_34_Line3;A;SNP;34;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATC;1;2;3;Sequence;;;3\n") expFileHandle.write("Batch_Gene2_SNP_34_Line4;A;SNP;34;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATA;1;2;4;Sequence;;;2\n") expFileHandle.write("Batch_Gene2_SNP_49_Line3;A;SNP;49;C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;;1;2;3;Sequence;;;2\n") expFileHandle.write("Batch_Gene2_SNP_49_Line4;A;SNP;49;CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;;1;2;4;Sequence;;;4\n") expFileHandle.close() def _writeExpSubSNPFileSeveralLineSeq(self): expFileHandle = open(self._expSubSNPFileName, "w") expFileHandle.write("SubSNPName;ConfidenceValue;Type;Position;5flank;3flank;Length;BatchNumber;IndividualNumber;PrimerType;PrimerNumber;Forward_or_Reverse;AlleleNumber\n") expFileHandle.write("Batch1_SNP_4_Line1;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;1\n") expFileHandle.write("Batch1_SNP_4_Line2;A;SNP;4;CCT;AGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") expFileHandle.write("Batch1_SNP_21_Line1;A;SNP;21;CCTTAGCCATTGCTTGGTGA;TATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_21_Line2;A;SNP;21;CCTAAGCCATTGCTTGGTGA;TATCAAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_25_Line1;A;SNP;25;CCTTAGCCATTGCTTGGTGACTAT;AAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;3\n") expFileHandle.write("Batch1_SNP_25_Line2;A;SNP;25;CCTAAGCCATTGCTTGGTGACTAT;AAGGCAGTAGCCAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_36_Line1;A;SNP;36;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAG;CAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;3\n") expFileHandle.write("Batch1_SNP_36_Line2;A;SNP;36;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAG;CAAACCTCCACAATACGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_51_Line1;A;SNP;51;CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAAT;CGCAGTAGCCAAACCTCCACAATA;1;1;1;Sequence;;;2\n") expFileHandle.write("Batch1_SNP_51_Line2;A;SNP;51;CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAAT;CGCAGTAGCCAAACCTCCACAATA;1;1;2;Sequence;;;4\n") expFileHandle.close() def _writeExpAlleleFile(self): expFileHandle = open(self._expAlleleFileName, "w") expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n") expFileHandle.write("1;T;;;\n") expFileHandle.write("2;C;;;\n") expFileHandle.write("3;G;;;\n") expFileHandle.write("4;A;;;\n") expFileHandle.close() def _writeExpAlleleFileWithSnpsAndIndels(self): expFileHandle = open(self._expAlleleFileName, "w") expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n") expFileHandle.write("1;T;;;\n") expFileHandle.write("2;C;;;\n") expFileHandle.write("3;G;;;\n") expFileHandle.write("4;A;;;\n") expFileHandle.write("5;---;;;\n") expFileHandle.write("6;CT;;;\n") expFileHandle.write("7;TT-;;;\n") expFileHandle.write("8;--;;;\n") expFileHandle.close() def _writeExpAlleleFileSeveralBatches(self): expFileHandle = open(self._expAlleleFileName, "w") expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n") expFileHandle.write("1;T;;;\n") expFileHandle.write("2;C;;;\n") expFileHandle.write("3;G;;;\n") expFileHandle.write("4;A;;;\n") expFileHandle.write("5;---;;;\n") expFileHandle.write("6;CT;;;\n") expFileHandle.write("7;TT-;;;\n") expFileHandle.write("8;--;;;\n") expFileHandle.close() def _writeExpIndividualFile(self): expFileHandle = open(self._expIndividualFileName, "w") expFileHandle.write("IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id\n") expFileHandle.write("1;Line1;;;;;;;;;;Arabidopsis thaliana;;;;;\n") expFileHandle.write("2;Line2;;;;;;;;;;Arabidopsis thaliana;;;;;\n") expFileHandle.close() def _writeExpIndividualFile_different_lines_between_files(self): expFileHandle = open(self._expIndividualFileName, "w") expFileHandle.write("IndividualNumber;IndividualName;Description;AberrAneuploide;FractionLength;DeletionLineSynthesis;UrlEarImage;TypeLine;ChromNumber;ArmChrom;DeletionBin;ScientificName;local_germplasm_name;submitter_code;local_institute;donor_institute;donor_acc_id\n") expFileHandle.write("1;Line1;;;;;;;;;;Arabidopsis thaliana;;;;;\n") expFileHandle.write("2;Line2;;;;;;;;;;Arabidopsis thaliana;;;;;\n") expFileHandle.write("3;Line3;;;;;;;;;;Arabidopsis thaliana;;;;;\n") expFileHandle.write("4;Line4;;;;;;;;;;Arabidopsis thaliana;;;;;\n") expFileHandle.close() def _writeExpSequenceFile(self): SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w") SequenceFSAFileHandle.write(">Sequence_de_Reference\n") SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w") SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n") def _writeExpSequenceFileSeveralLineSeq(self): SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w") SequenceFSAFileHandle.write(">Sequence_de_Reference\n") SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n") SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w") SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n") def _writeExpSequenceFileWithDeletion(self): SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w") SequenceFSAFileHandle.write(">Sequence_de_Reference\n") SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w") SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") SequenceCSVFileHandle.write("Sequence_de_Reference;Reference;;;;;Arabidopsis thaliana\n") def _writeExpSequenceSeveralBatches(self): SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w") SequenceFSAFileHandle.write(">Sequence_de_Reference1\n") SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") SequenceFSAFileHandle.write(">Sequence_de_Reference2\n") SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w") SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n") SequenceCSVFileHandle.write("Sequence_de_Reference2;Reference;;;;;Arabidopsis thaliana\n") def _writeExpSequenceSeveralBatchesForSameRefSeq(self): SequenceFSAFileHandle = open(self._expSequenceFSAFileName, "w") SequenceFSAFileHandle.write(">Sequence_de_Reference1\n") SequenceFSAFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") SequenceFSAFileHandle.write(">Sequence_de_Reference1\n") SequenceFSAFileHandle.write("CAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") SequenceCSVFileHandle = open(self._expSequenceCSVFileName, "w") SequenceCSVFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n") SequenceCSVFileHandle.write("Sequence_de_Reference1;Reference;;;;;Arabidopsis thaliana\n") def _writeExpBatchFile(self): BatchFileHandle = open(self._expBatchFileName, "w") BatchFileHandle.write("BatchNumber: 1\n") BatchFileHandle.write("BatchName: Batch1\n") BatchFileHandle.write("GeneName: methyltransferase\n") BatchFileHandle.write("Description: \n") BatchFileHandle.write("ContactNumber: 1\n") BatchFileHandle.write("ProtocolNumber: 1\n") BatchFileHandle.write("ThematicNumber: 1\n") BatchFileHandle.write("RefSeqName: Sequence_de_Reference\n") BatchFileHandle.write("AlignmentFileName: \n") BatchFileHandle.write("SeqName: \n") BatchFileHandle.write("//\n") BatchFileHandle.close() def _writeExpBatchFileSeveralBatches(self): BatchFileHandle = open(self._expBatchFileName, "w") BatchFileHandle.write("BatchNumber: 1\n") BatchFileHandle.write("BatchName: Batch_Gene1\n") BatchFileHandle.write("GeneName: Gene1\n") BatchFileHandle.write("Description: \n") BatchFileHandle.write("ContactNumber: 1\n") BatchFileHandle.write("ProtocolNumber: 1\n") BatchFileHandle.write("ThematicNumber: 1\n") BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n") BatchFileHandle.write("AlignmentFileName: \n") BatchFileHandle.write("SeqName: \n") BatchFileHandle.write("//\n") BatchFileHandle.write("BatchNumber: 2\n") BatchFileHandle.write("BatchName: Batch_Gene2\n") BatchFileHandle.write("GeneName: Gene2\n") BatchFileHandle.write("Description: \n") BatchFileHandle.write("ContactNumber: 1\n") BatchFileHandle.write("ProtocolNumber: 1\n") BatchFileHandle.write("ThematicNumber: 1\n") BatchFileHandle.write("RefSeqName: Sequence_de_Reference2\n") BatchFileHandle.write("AlignmentFileName: \n") BatchFileHandle.write("SeqName: \n") BatchFileHandle.write("//\n") BatchFileHandle.close() def _writeExpBatchFileSeveralBatchesForSameRefSeq(self): BatchFileHandle = open(self._expBatchFileName, "w") BatchFileHandle.write("BatchNumber: 1\n") BatchFileHandle.write("BatchName: Batch_Gene1\n") BatchFileHandle.write("GeneName: Gene1\n") BatchFileHandle.write("Description: \n") BatchFileHandle.write("ContactNumber: 1\n") BatchFileHandle.write("ProtocolNumber: 1\n") BatchFileHandle.write("ThematicNumber: 1\n") BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n") BatchFileHandle.write("AlignmentFileName: \n") BatchFileHandle.write("SeqName: \n") BatchFileHandle.write("//\n") BatchFileHandle.write("BatchNumber: 2\n") BatchFileHandle.write("BatchName: Batch_Gene2\n") BatchFileHandle.write("GeneName: Gene2\n") BatchFileHandle.write("Description: \n") BatchFileHandle.write("ContactNumber: 1\n") BatchFileHandle.write("ProtocolNumber: 1\n") BatchFileHandle.write("ThematicNumber: 1\n") BatchFileHandle.write("RefSeqName: Sequence_de_Reference1\n") BatchFileHandle.write("AlignmentFileName: \n") BatchFileHandle.write("SeqName: \n") BatchFileHandle.write("//\n") BatchFileHandle.close() BatchFileHandle.close() def _writeExpBatchLineFile(self): BatchLineFileHandle = open(self._expBatchLineFileName, "w") BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n") BatchLineFileHandle.write("1;;;1;\n") BatchLineFileHandle.write("2;;;1;\n") BatchLineFileHandle.close() def _writeExpBatchLineFileSeveralBatches(self): BatchLineFileHandle = open(self._expBatchLineFileName, "w") BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n") BatchLineFileHandle.write("1;;;1;\n") BatchLineFileHandle.write("2;;;1;\n") BatchLineFileHandle.write("1;;;2;\n") BatchLineFileHandle.write("2;;;2;\n") BatchLineFileHandle.close() def _writeExpBatchLineFileSeveralBatches_different_lines_between_files(self): BatchLineFileHandle = open(self._expBatchLineFileName, "w") BatchLineFileHandle.write("IndividualNumber;Pos5;Pos3;BatchNumber;Sequence\n") BatchLineFileHandle.write("1;;;1;\n") BatchLineFileHandle.write("2;;;1;\n") BatchLineFileHandle.write("3;;;2;\n") BatchLineFileHandle.write("4;;;2;\n") BatchLineFileHandle.close() def _writeInputFile(self): inFileHandle = open(self._inFileName, "w") inFileHandle.write(">Sequence_de_Reference\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") inFileHandle.write(">Line1\n") inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") inFileHandle.write(">Line2\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") inFileHandle.close() def _writeInputFileWithSnpsAndIndels(self): inFileHandle = open(self._inFileName, "w") inFileHandle.write(">Sequence_de_Reference\n") inFileHandle.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") inFileHandle.write(">Line1\n") inFileHandle.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") inFileHandle.write(">Line2\n") inFileHandle.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") inFileHandle.close() def _writeInputFileWithSeqErrorsInRefSeq(self): inFileHandle = open(self._inFileName, "w") inFileHandle.write(">Sequence_de_Reference\n") inFileHandle.write("CCTA7GCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") inFileHandle.write(">Line1\n") inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") inFileHandle.write(">Line2\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") inFileHandle.close() def _writeInputFileWithSeqErrorsInOneLineSeq(self): inFileHandle = open(self._inFileName, "w") inFileHandle.write(">Sequence_de_Reference\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") inFileHandle.write(">Line1\n") inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATXAAGGCAGTAGGCAAACCTCCACAATC\n") inFileHandle.write(">Line2\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") inFileHandle.close() def _writeInputFileWithASeveralLineSeq(self): inFileHandle = open(self._inFileName, "w") inFileHandle.write(">Sequence_de_Reference\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n") inFileHandle.write(">Line1\n") inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATCCGCAGTAGCCAAACCTCCACAATA\n") inFileHandle.write(">Line2\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA\nCGCAGTAGCCAAA\nCCTCCACAATA\n") inFileHandle.close() def _writeInputFileWithUpcaseAndLowcaseNucleotide(self): inFileHandle = open(self._inFileName, "w") inFileHandle.write(">Sequence_de_Reference\n") inFileHandle.write("CCTAAGCCATTGCTTGGtGATTATGAAGgCAGTAGTCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n") inFileHandle.write(">Line1\n") inFileHandle.write("CCTTAGCCATTGCtTGGTGACTATGAAGGcAGTAGGCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n") inFileHandle.write(">Line2\n") inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\nCGCAGTAGCCAAA\nCCTCCACAATA\n") inFileHandle.close() def _writeInputFileWith2SeqsWithTheSameName(self): inFileHandle = open(self._inFileName, "w") inFileHandle.write(">Sequence_de_Reference\n") inFileHandle.write("CCTAAGCCATTGCTTGGtGATTATGAAGgCAGTAGTCAAACCTCCACAATC\nCGCAGTAGCCAAA\nCCTCCACAATA\n") inFileHandle.write(">Line1\n") inFileHandle.write("CCTTAGCCATTGCtTGGTGACTATGAAGGcAGTAGGCAAACCTCCACAATC\n") inFileHandle.write(">Line2\n") inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\n") inFileHandle.write(">Line2\n") inFileHandle.write("CCTAAGCCAtTGCTTGGTGACTATCaAGGCAGTAGCCAAACCTCCACAATA\n") inFileHandle.close() def _writeInputFileBatchWithPotentialDooblons(self): inFileHandle = open(self._inFileName, "w") inFileHandle.write(">AU247387ref\n") inFileHandle.write("CACTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGTTCTGGTTACTCTTCAATTTGGGCATGCTTAATTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTTTATAGCCTGCTCACCAACATGTGATCTGTTCTTTGTATGCTCAGGTGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCACTTGCTATACTGTCTATTTGTTCGCAGGTGATTGCAGGTCTGTGGATCCTCTGGGTTCTTTCTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTGTGCTGCAACAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCATACGAGTTGTGAACTGATGACATCCTCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG\n") inFileHandle.write(">10102\n") inFileHandle.write("NNNtatagctcctaacattcctgaagtgaagatcacrgaggacnnggctgtcaatgttgcccgctcgctgagatatgagatcaacaggggcttygctagcttgagggcgattggNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">10954\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">ABERAVON\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">CARILLON\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatcggtcagggccgtgatctgaagaaattcctcattgtatgttctggttactcttcaatttgggcatgcttaat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctgttctttgtatgctcaggtggttgccgg---------------------------------------------------------------------------------------------------cctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaat------------------tttaacttgtgctgcaacacttgagttcataaccaccctag------ttgtccatacgagttgtgaactgatgacatccgttctttttcccragtgcagtcttcgtggtgctctacacggtgccagttctgtatgaNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">CONCERTO\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacttgcagtcttcatggttctctacactgtgccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">F14-13\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtsaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaaNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">GAGNY\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatyggtcagggccgtgayctgaagaaattcctsattgtaygtNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">GREECE\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtsaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatyggycagggccgtgatctgaagaaattcctcattgtatgtNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">IMAGINE\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacttgcagtcttcatggttctctacactgtgccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">IRELAND\n") inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGACGATTCCAGAGGACACGATTGTGAACATTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTCTTTACTTTGATGGAGATTGGCCAGGGCCGTGATCTGAAGAAATTCCTCATTGTATGT---------------------------TTGTTTATCTCCTCAATTTCAATTTGGCCATGCTTAATGTTGGGTGCTTTCTGTATAGCCTGCTCACCAAGGTGTGATCTCTTCTTTGTATACACAGGTGGTTGCTGG---------------------------------------------------------------------------------------------------CCTCTGGGTTCTTTCTGTTCTTGGGAGCTCTTGCAACTTCTTGACNTTGGCATATATAGGTNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">NEMOF\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">NEMOH\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaatgttgcccgctcgctgagatatgagatcaacaggggctttgctagcctgagggcgattggtcaaggccgtgacctgaagaaattcctgattgtacgt---------------------------ttaat---------------------------------------------------------------------------------------------tggttgcatggcttcgttctctttagccttcgctgtttgtggctttgttatgtgaccaagcacttgctatactgtctatttgttcgcaggtgattgcaggtctgtggatcctct---------ctgcccttgggagctgctgcaatttcctcaccttgttctacataggtaatgtgcttcgctgctacagcctgaacttg--------cagatgtgcagtaactgtacctagcattgtttacccat------------------------tctcgctttcttacNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">POLAND\n") inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTAYGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">SPAIN\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNtcaacattgcccgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatcggtcagggccgtgatctgaagaaattcctcattgtatgttctggttactcttcaatttgggcatgcttaat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctgttctttgtatgctcaggtggttgccgg---------------------------------------------------------------------------------------------------cctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaat------------------tttaacttgtgctgcaacacttgagttcataaccaccctag------ttgtccatacgagttgtgaactgatgacatccgttctttttcccgagtgcagtcttcgtggtgctctacacggtgccagttctgtatgagaagtacgacgacaaggttgatgcttttggtgagaag\n") inFileHandle.write(">TRANSATE\n") inFileHandle.write("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNcgctcgctgagatatgagatcaacaggggcttctttactttgaaggagatYggccagggtcgcgacctcaagaaattcctcattgtatgttgcttgt-ctcttcaatttcaacatgcttgat---------------------------------gttgggtgctttctttat--cctgctcaccaacatgtgatctcttctttgtatgctcaggtggttgcggg---------------------------------------------------------------------------------------------------tctctgggttctttctgttcttgggagctcttgcaacttcttgacattggcatatataggtaaK------------------tataRcttgtgctgcaacacttgagttcataaccNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") inFileHandle.write(">VIGOR\n") inFileHandle.write("NNNTATAGCTCCTAACATTCCTGAAGTGAAGATCACGGAGGACCTGGCTGTCAATGTTGCCCGCTCGCTGAGATATGAGATCAACAGGGGCTTTGCTAGCCTGAGGGCGATTGGTCAAGGCCGTGACCTGAAGAAATTCCTGATTGTACGT---------------------------TTAAT---------------------------------------------------------------------------------------------TGGTTGCATGGCTTCGTTCTCTTTAGCCTTCGCTGTTTGTGGCTTTGTTATGTGACCAAGCACTTGCTATACTGTCTATTTGTTCGCAGGTGATTGCAGGTCTGTGGATCCTCT---------CTGCCCTTGGGAGCTGCTGCAATTTCCTCACCTTGTTCTACATAGGTAATGTGCTTCGCTGCTACAGCCTGAACTTG--------CAGATGTGCAGTAACTGTACCTAGCATTGTTTACCCAT------------------------TCTCGCTTTCTTACTTGCAGTCTTCATGGTTCTCTACACTGTGCCGGTTCTGTACGAGAAGTACGAGGACAAGATCGATGCTTTTGGAGAGAAG\n") inFileHandle.close() def _writeRealExpAlleleFile(self): expFileHandle = open(self._expAlleleFileName, "w") expFileHandle.write("AlleleNumber;Value;Motif;NbCopy;Comment\n") expFileHandle.write("1;G;;;\n") expFileHandle.write("2;T;;;\n") expFileHandle.write("3;A;;;\n") expFileHandle.write("4;C;;;\n") expFileHandle.write("5;-;;;\n") expFileHandle.close(); def _writeRealExpSequenceCSVFile(self): SequenceFSAFileHandle = open(self._expSequenceCSVFileName, "w") SequenceFSAFileHandle.write("SequenceName;SeqType;BankName;BankVersion;ACNumber;Locus;ScientificName\n") SequenceFSAFileHandle.write("PpHDZ31_ref;Reference;;;;;Pinus pinaster\n") SequenceFSAFileHandle.close() def _writeRealExpBatchFile(self): FileHandle = open(self._expBatchFileName, "w") FileHandle.write("BatchNumber: 1\n") FileHandle.write("BatchName: INRA_Pinus_pinaster_HDZ31-1\n") FileHandle.write("GeneName: PpHDZ31\n") FileHandle.write("Description: \n") FileHandle.write("ContactNumber: 1\n") FileHandle.write("ProtocolNumber: 1\n") FileHandle.write("ThematicNumber: 1\n") FileHandle.write("RefSeqName: PpHDZ31_ref\n") FileHandle.write("AlignmentFileName: \n") FileHandle.write("SeqName: \n") FileHandle.write("//\n") FileHandle.close() def _writeInputFileSeveralBatches(self): if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)): os.mkdir(self._inputDirSeveralBatches) inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w") inFileHandle.write(">Sequence_de_Reference1\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") inFileHandle.write(">Line1\n") inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") inFileHandle.write(">Line2\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") inFileHandle.close() inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w") inFileHandle2.write(">Sequence_de_Reference2\n") inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") inFileHandle2.write(">Line1\n") inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") inFileHandle2.write(">Line2\n") inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") inFileHandle2.close() def _writeInputFileSeveralBatches_different_lines_between_files(self): if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)): os.mkdir(self._inputDirSeveralBatches) inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w") inFileHandle.write(">Sequence_de_Reference1\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") inFileHandle.write(">Line1\n") inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") inFileHandle.write(">Line2\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") inFileHandle.close() inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w") inFileHandle2.write(">Sequence_de_Reference2\n") inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") inFileHandle2.write(">Line3\n") inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") inFileHandle2.write(">Line4\n") inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") inFileHandle2.close() def _writeInputFileSeveralBatches_different_lines_and_same_refseq_between_files(self): if(not FileUtils.isRessourceExists(self._inputDirSeveralBatches)): os.mkdir(self._inputDirSeveralBatches) inFileHandle = open(self._inputDirSeveralBatches+"/Gene1.fasta","w") inFileHandle.write(">Sequence_de_Reference1\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") inFileHandle.write(">Line1\n") inFileHandle.write("CCTTAGCCATTGCTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") inFileHandle.write(">Line2\n") inFileHandle.write("CCTAAGCCATTGCTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") inFileHandle.close() inFileHandle2 = open(self._inputDirSeveralBatches+"/Gene2.fasta","w") inFileHandle2.write(">Sequence_de_Reference1\n") inFileHandle2.write("C--AAGCCATTGCTTGGTGATTATGAAGGCAGTAGTCAAACCTCCACAATC\n") inFileHandle2.write(">Line3\n") inFileHandle2.write("C--TAGCCA---CTTGGTGACTATGAAGGCAGTAGGCAAACCTCCACAATC\n") inFileHandle2.write(">Line4\n") inFileHandle2.write("CCTAAGCCATT-CTTGGTGACTATCAAGGCAGTAGCCAAACCTCCACAATA") inFileHandle2.close() if __name__ == "__main__": unittest.main()