annotate commons/core/parsing/Multifasta2SNPFile.py @ 68:85e80c21b1f7 draft

Uploaded
author m-zytnicki
date Mon, 16 Nov 2015 12:00:32 -0500
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 import re
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 import os
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 import logging
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 from commons.core.utils.FileUtils import FileUtils
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 from commons.core.seq.BioseqDB import BioseqDB
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 from commons.core.seq.Bioseq import Bioseq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 from commons.core.LoggerFactory import LoggerFactory
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 DNA_ALPHABET_WITH_N_AND_DELS = set (['A','T','G','C','N','-'])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 IUPAC = set(['A','T','G','C','U','R','Y','M','K','W','S','B','D','H','V','N', '-', 'a','t','g','c','u','r','y','m','k','w','s','b','d','h','v','n'])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 class Multifasta2SNPFile( object ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 POLYM_TYPE_4_SNP = "SNP"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 POLYM_TYPE_4_INSERTION = "INSERTION"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 POLYM_TYPE_4_DELETION = "DELETION"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 POLYM_DEFAULT_CONFIDENCE_VALUE = "A"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 SNP_LENGTH = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 FLANK_LENGTH = 250
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 def __init__(self, taxon, batchName="", geneName=""):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 if(batchName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 self._batchName = batchName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 if(geneName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 self._geneName = geneName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 self._taxon = taxon
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30 self._outSubSNPFileName = "SubSNP.csv"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31 self._outAlleleFileName = "Allele.csv"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 self._outIndividualFileName = "Individual.csv"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 self._outSequenceFSAFileName = "Sequences.fsa"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 self._outSequenceCSVFileName = "Sequences.csv"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 self._outBatchFileName = "Batch.txt"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 self._outBatchLineFileName = "BatchLine.csv"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 self._logFileName = "multifasta2SNP.log"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 self._lBatchFileResults = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40 self._lSubSNPFileResults = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41 self._lRefSequences = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42 self._lIndividualFileResults = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 self._lBatchLineFileResults = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44 self._dIndividualNumbers4SubSNPResults = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 self._dAlleleFileResults = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 self.dcurrentIndel = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 self.lIndelsOfTheCurrentLine = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 self.lIndelsOverAllLines = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 self.dSNPsPositions = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 self._iCurrentLineNumber = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 self._currentBatchNumber = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 self.currentLineName = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 self.currentNucleotide = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 self.currentPosition = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 self._sPolymConfidenceValue = Multifasta2SNPFile.POLYM_DEFAULT_CONFIDENCE_VALUE
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 self._sPolymType = Multifasta2SNPFile.POLYM_TYPE_4_SNP
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 self._iPolymLength = Multifasta2SNPFile.SNP_LENGTH
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 self._fileUtils = FileUtils()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 if self._fileUtils.isRessourceExists(self._logFileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 os.remove(self._logFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 self._logFile = LoggerFactory.createLogger(self._logFileName, logging.INFO, "%(asctime)s %(levelname)s: %(message)s")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 def runOneBatch( self, inFileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68 self._currentFileName = inFileName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69 #TODO: methode a virer; n'utiliser au final que runOneBatchWithoutWriting
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 self._wrapper = self.createWrapperFromFile(inFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 self._lBatchFileResults = self.completeBatchList()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72 self.detectSNPsAndIndels(self._wrapper)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73 self._writeAllOutputFiles()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74 self._currentBatchNumber += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 def runOneBatchWithoutWriting( self, inFileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77 self.lIndelsOverAllLines = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78 self._currentFileName = inFileName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 self._wrapper = self.createWrapperFromFile(inFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80 self._lBatchFileResults = self.completeBatchList()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81 self.detectSNPsAndIndels(self._wrapper)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82 self._currentBatchNumber += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 def _cleanOutputsInTheCurrentDir(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 #TODO: create a list of files to be deleted
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87 FileUtils.removeFilesByPattern("*.csv")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88 if (FileUtils.isRessourceExists(self._outBatchFileName)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89 os.remove(self._outBatchFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
90 if (FileUtils.isRessourceExists(self._outSequenceFSAFileName)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
91 os.remove(self._outSequenceFSAFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
94 def _createOutputObjectsIteratingOnCurrentDir(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
95 #TODO: gerer les extensions multiples
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96 extList = [".fasta", ".fsa"]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97 for dirname, dirnames, filenames in os.walk("."):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
98 filenames.sort()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
99 for filename in filenames:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100 if os.path.splitext(filename)[1] in extList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101 self._geneName = os.path.splitext(filename)[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
102 self._batchName = "Batch_" + self._geneName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
103 self.runOneBatchWithoutWriting(filename)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
104
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
105 def runSeveralBatches( self, inputDir):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
106 #TODO: enlever les chdirs, appeler les fichiers en absolu et modifier les tests en consequences
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107 os.chdir(inputDir)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108 self._cleanOutputsInTheCurrentDir()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
109 self._createOutputObjectsIteratingOnCurrentDir()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
110 self._writeAllOutputFiles()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
111 os.chdir("../")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
112
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
113
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114 def _treatADeletionClosingWithAnotherBaseThanRefSeq(self, lineName, nucleotide, position):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115 if (self.isTheIndelOpen4ThisLine):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
116 self._closeTheCurrentIndel(lineName, nucleotide, position)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
117 self._manageSNPs(lineName, nucleotide, position)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
118 self.addOnePolymorphicPosition(position)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
119
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
120 def _treatNucleotideDifferentThanRefSeqCase(self, refSeq, lineName, index, nucleotide, position):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
121 if (nucleotide == "-" or refSeq[index] == "-"):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
122 if (self.isTheIndelOpen4ThisLine):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
123 self._expandTheCurrentIndel(position, nucleotide)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
124 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
125 self._startAnIndel(position, nucleotide)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
126 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
127 self._treatADeletionClosingWithAnotherBaseThanRefSeq(lineName, nucleotide, position)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
128
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
129
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
130 def _treatSameNucleotideInOneIndel(self, refSeq, lineName, index, nucleotide, position):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131 if (self._sPolymType == Multifasta2SNPFile.POLYM_TYPE_4_DELETION):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132 self._closeTheCurrentIndel(lineName, nucleotide, position)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
133 elif (self._sPolymType == Multifasta2SNPFile.POLYM_TYPE_4_INSERTION):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
134 if (refSeq[index] == "-"):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
135 self._expandTheCurrentIndel(position, nucleotide)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
136 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
137 self._closeTheCurrentIndel(lineName, nucleotide, position)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
138
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
139 def detectSNPsAndIndels(self, iRefAndLines):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
140 refSeq = iRefAndLines.getReferenceSequence()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
141 refSeqLength = len ( refSeq )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142 self.dSNPsPositions = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
144 for iLineBioseq in iRefAndLines.getLinesBioseqInstances():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
145 lineSequence = iLineBioseq.sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
146 self.currentLineName = iLineBioseq.header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
147 self._manageCurrentIndividual(self.currentLineName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
148
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
149 index = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
150 self.isTheIndelOpen4ThisLine = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
151 self.lIndelsOfTheCurrentLine = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152 for nucleotide in lineSequence:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153 position = index + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
154 if (index < refSeqLength) and self._isSNPDetected(refSeq, index, nucleotide):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
155 self._treatNucleotideDifferentThanRefSeqCase(refSeq, self.currentLineName, index, nucleotide, position)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
156 elif(index < refSeqLength and self.isTheIndelOpen4ThisLine) :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
157 self._treatSameNucleotideInOneIndel(refSeq, self.currentLineName, index, nucleotide, position)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
158 index = index + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
159 self.currentNucleotide = nucleotide
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
160 self.currentPosition = position
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
161
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
162 self.lIndelsOverAllLines = self.lIndelsOverAllLines + self.lIndelsOfTheCurrentLine
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
163
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
164 self._postTraitementDetectSNP(self.currentLineName, self.currentNucleotide, self.currentPosition)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
165
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
166 def _manageCurrentIndividual(self, lineName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
167 self._lIndividualFileResults = self._completeIndividualListWithCurrentIndividual(self._lIndividualFileResults, lineName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
168 self._lBatchLineFileResults = self._completeBatchLineListWithCurrentIndividual(self._lBatchLineFileResults, self._lIndividualFileResults, lineName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
169 if not self._dIndividualNumbers4SubSNPResults.__contains__(lineName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
170 self._dIndividualNumbers4SubSNPResults[lineName] = len(self._lIndividualFileResults)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
171
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
172
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
173 def _manageLastPositionIndels(self, lineName, nucleotide, position):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
174 if (self.isTheIndelOpen4ThisLine):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
175 self._closeTheCurrentIndel(lineName, nucleotide, position)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
176 self.lIndelsOverAllLines.append(self.lIndelsOfTheCurrentLine.pop())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
177
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
178 def _postTraitementDetectSNP(self, lineName, nucleotide, position):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
179 self._manageLastPositionIndels(lineName, nucleotide, position)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
180
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
181 self._mergeAllelesAndSubSNPsFromOverlappingIndels()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
182 self._addMissingsAllelesAndSubSNPs()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
183
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
184 self._lSubSNPFileResults = self._sortSubSNPResultByBatchPositionAndLineName(self._lSubSNPFileResults)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
185
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
186 def _manageSNPs(self, lineName, nucleotide, position):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
187 self._dAlleleFileResults = self._completeAlleleSetWithCurrentAllele(self._dAlleleFileResults, nucleotide)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
188 truePosition = self.getUngappedPositionInRefSeq(position)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
189 subSNPName = self._formatSubSNPName(lineName, truePosition, Multifasta2SNPFile.POLYM_TYPE_4_SNP)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
190 iAlleleNumber = self._dAlleleFileResults[nucleotide]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
191 self._sPolymType = Multifasta2SNPFile.POLYM_TYPE_4_SNP
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
192 flank5Prime, flank3Prime = self.getFlanksOfASubSNP(lineName, position, Multifasta2SNPFile.SNP_LENGTH, Multifasta2SNPFile.FLANK_LENGTH)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
193 dSubSNPResult = {'subSNPName':subSNPName, 'position':truePosition, 'lineName':self._dIndividualNumbers4SubSNPResults[lineName],
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
194 'allele':iAlleleNumber, 'batchNumber': self._currentBatchNumber, 'confidenceValue':self._sPolymConfidenceValue,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
195 'type':self._sPolymType, 'length': Multifasta2SNPFile.SNP_LENGTH,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
196 '5flank':flank5Prime, '3flank':flank3Prime}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
197 if(not self.subSNPExistsInSubSNPList(dSubSNPResult, self._lSubSNPFileResults)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
198 self._lSubSNPFileResults.append(dSubSNPResult)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
199
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
200 def _startAnIndel(self, position, nucleotide):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
201 self.dcurrentIndel['start'] = position
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
202 self.dcurrentIndel['end'] = position
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
203 self.sCurrentIndelAllele = nucleotide
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
204 if(nucleotide == "-"):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
205 self._sPolymType = Multifasta2SNPFile.POLYM_TYPE_4_DELETION
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
206 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
207 self._sPolymType = Multifasta2SNPFile.POLYM_TYPE_4_INSERTION
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
208 self.isTheIndelOpen4ThisLine = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
209
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
210 def _expandTheCurrentIndel(self, position, nucleotide):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
211 self.sCurrentIndelAllele = self.sCurrentIndelAllele + nucleotide
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
212 self.dcurrentIndel['end'] = position
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
213
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
214 def _closeTheCurrentIndel(self, lineName, nucleotide, position):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
215 subSNPName = self._formatSubSNPName(lineName, self.dcurrentIndel['start'], self._sPolymType)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
216
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
217 dIndel4TheLine = {'name': subSNPName, 'lineName': lineName, 'start': self.dcurrentIndel['start'],'end' :self.dcurrentIndel['end'],
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
218 'allele': self.sCurrentIndelAllele, 'type': self._sPolymType, 'length': self._iPolymLength}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
219
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
220 dIndel4TheLine['length'] = self.getAnIndelLength(dIndel4TheLine)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
221
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
222 self.lIndelsOfTheCurrentLine.append(dIndel4TheLine)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
223
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
224 self.dcurrentIndel.clear()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
225 self.isTheIndelOpen4ThisLine = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
226
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
227
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
228 def _mergeAllelesAndSubSNPsFromOverlappingIndels(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
229 lIndelList = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
230 for dIndel in self.lIndelsOverAllLines:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
231 lIndelList = self.clusteriseIndels(dIndel, self.lIndelsOverAllLines)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
232
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
233 for dIndel in lIndelList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
234 oldAllele = dIndel['allele']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
235 start = dIndel['start']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
236 stop = dIndel['end']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
237 lineName = dIndel['lineName']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
238
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
239 LineBioSeq = self._wrapper._iLinesBioseqDB.fetch(lineName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
240 dIndel = self.updateAllele(oldAllele, start, stop, LineBioSeq, dIndel)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
241 dSubSNPResult = self.createSubSNPFromAMissingPolym(dIndel, lineName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
242 if(not self.subSNPExistsInSubSNPList(dSubSNPResult, self._lSubSNPFileResults)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
243 self._lSubSNPFileResults.append(dSubSNPResult)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
244
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
245 def updateAllele(self, oldAllele, start, stop, LineBioSeq, dIndel):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
246 #TODO: creer le test
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
247 newAllele = LineBioSeq.subseq(start, stop).sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
248 if newAllele != oldAllele:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
249 dIndel['allele'] = newAllele
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
250 self._dAlleleFileResults = self._completeAlleleSetWithCurrentAllele(self._dAlleleFileResults, newAllele)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
251 return dIndel
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
252
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
253
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
254 def getFlanksOfASubSNP(self, lineName, subsnpPosition, polymLength, flankLength):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
255 bioSeqOfTheLine = self._wrapper._iLinesBioseqDB.fetch(lineName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
256 flank5Prime = bioSeqOfTheLine.get5PrimeFlank(subsnpPosition, flankLength)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
257 flank3Prime = bioSeqOfTheLine.get3PrimeFlank(subsnpPosition, flankLength, polymLength)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
258
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
259 return flank5Prime, flank3Prime
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
260
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
261 def createSubSNPFromAMissingPolym(self, dIndel, lineName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
262 if(dIndel['type'] == Multifasta2SNPFile.POLYM_TYPE_4_INSERTION):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
263 start = self.getUngappedPositionInRefSeq(dIndel['start']-1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
264 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
265 start = self.getUngappedPositionInRefSeq(dIndel['start'])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
266
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
267 subSNPName = self._formatSubSNPName(dIndel['lineName'], start, dIndel['type'])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
268
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
269 iAlleleNumber = self._dAlleleFileResults[dIndel['allele']]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
270
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
271 iPolymLength = self.getAnIndelLength(dIndel)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
272
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
273 flank5Prime, flank3Prime = self.getFlanksOfASubSNP(lineName, dIndel['start'], iPolymLength, Multifasta2SNPFile.FLANK_LENGTH)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
274
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
275 dSubSNPResult = {'subSNPName':subSNPName, 'position':start, 'lineName':self._dIndividualNumbers4SubSNPResults[lineName], 'allele':iAlleleNumber,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
276 'batchNumber': self._currentBatchNumber, 'confidenceValue':self._sPolymConfidenceValue, 'type':dIndel['type'],
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
277 'length': iPolymLength, '5flank':flank5Prime, '3flank':flank3Prime}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
278
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
279 return dSubSNPResult
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
280
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
281 def clusteriseIndels(self, dIndel, lIndelsOverAllLines):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
282 iIndice = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
283 for dIndel in lIndelsOverAllLines:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
284 iIndice2Compare = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
285 for dIndel2Compare in lIndelsOverAllLines:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
286 dIndel, dIndel2Compare = self.mergeBoundsForTwoOverlappingIndels(dIndel, dIndel2Compare)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
287 lIndelsOverAllLines = self.updateBoundsForAnIndelInAnIndelList(lIndelsOverAllLines, dIndel)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
288 lIndelsOverAllLines = self.updateBoundsForAnIndelInAnIndelList(lIndelsOverAllLines, dIndel2Compare)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
289 iIndice2Compare = iIndice2Compare + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
290 iIndice = iIndice + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
291
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
292 return lIndelsOverAllLines
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
293
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
294 def mergeBoundsForTwoOverlappingIndels(self, dIndel1, dIndel2):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
295 if((dIndel2['start'] <= dIndel1['start']) and (dIndel2['end'] >= dIndel1['start']) or
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
296 (dIndel1['start'] <= dIndel2['start']) and (dIndel1['end'] >= dIndel2['start'])):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
297 if(dIndel1['start'] <= dIndel2['start']):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
298 iStart = dIndel1['start']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
299 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
300 iStart = dIndel2['start']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
301
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
302 if(dIndel1['end'] >= dIndel2['end']):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
303 iEnd = dIndel1['end']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
304 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
305 iEnd = dIndel2['end']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
306
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
307 dIndel1['start'] = iStart
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
308 dIndel1['end'] = iEnd
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
309 dIndel2['start'] = iStart
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
310 dIndel2['end'] = iEnd
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
311
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
312 return dIndel1, dIndel2
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
313
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
314 def updateBoundsForAnIndelInAnIndelList(self, lIndelsList, dIndelWithNewBounds):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
315 name = dIndelWithNewBounds['name']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
316 dIndelInTheList, iIndice = self.findAnIndelInAListWithHisName(name, lIndelsList)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
317 lIndelsList.remove(dIndelInTheList)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
318 lIndelsList.insert(iIndice, dIndelWithNewBounds)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
319 return lIndelsList
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
320
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
321
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
322 def findASubSNPInAListWithHisName(self, name, lSubSNPList):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
323 dSubSNP2Find = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
324 indice = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
325 indice2Find = -1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
326 for dSubSNP in lSubSNPList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
327 if(dSubSNP['subSNPName'] == name):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
328 dSubSNP2Find = dSubSNP
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
329 indice2Find = indice
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
330 indice = indice + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
331
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
332 if dSubSNP2Find == {} or indice2Find == -1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
333 msg = "trying to find a SubSNP not existing: " + name
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
334 self._logFile.error(msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
335 raise Exception ("trying to find a SubSNP not existing: " + name)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
336 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
337 return dSubSNP2Find, indice2Find
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
338
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
339 def subSNPExistsInSubSNPList(self, dSubSNP2Find, lSubSNPList):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
340 flag = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
341 for dSubSNP in lSubSNPList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
342 if(dSubSNP2Find['subSNPName'] == dSubSNP['subSNPName']):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
343 flag = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
344
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
345 if flag == 1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
346 return True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
347 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
348 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
349
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
350
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
351 def findAnIndelInAListWithHisName(self, name, lIndelList):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
352 dIndel2Find = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
353 indice = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
354 indice2Find = -1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
355 for dIndel in lIndelList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
356 if(dIndel['name'] == name):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
357 dIndel2Find = dIndel
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
358 indice2Find = indice
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
359 indice = indice + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
360
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
361 if dIndel2Find == {} or indice2Find == -1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
362 msg = "trying to find an indel not existing: " + name
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
363 self._logFile.error(msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
364 raise Exception (msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
365 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
366 return dIndel2Find, indice2Find
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
367
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
368 def _addMissingsAllelesAndSubSNPs(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
369 for dIndel in self.lIndelsOverAllLines:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
370 start = dIndel['start']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
371 end = dIndel['end']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
372 type = dIndel['type']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
373 self.addMissingAllelesAndSubSNPsForOnePolym(start, end, type)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
374
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
375 for position in self.dSNPsPositions:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
376 self.addMissingAllelesAndSubSNPsForOnePolym(position, position, "SNP")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
377
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
378 def addMissingAllelesAndSubSNPsForOnePolym(self, start, end, polymType):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
379 refSeqAllele = self._wrapper._iReferenceBioseq.subseq(start, end).sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
380 BioSeqDb = self._wrapper.getLinesBioseqInstances()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
381 lBioSeqDbAlleles = self.getAllelesOfASubSeq(BioSeqDb, start, end)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
382 for subSequence in lBioSeqDbAlleles:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
383 if(subSequence['allele'] == refSeqAllele):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
384 lineName = subSequence['header']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
385 dMissingPolym = {'lineName': lineName, 'start': start,'end' :end,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
386 'allele': subSequence['allele'], 'type':polymType}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
387 self._dAlleleFileResults = self._completeAlleleSetWithCurrentAllele(self._dAlleleFileResults, subSequence['allele'])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
388 dSubSNPResult = self.createSubSNPFromAMissingPolym(dMissingPolym, lineName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
389 if(not self.subSNPExistsInSubSNPList(dSubSNPResult, self._lSubSNPFileResults)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
390 self._lSubSNPFileResults.append(dSubSNPResult)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
391
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
392 def addOnePolymorphicPosition(self, position):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
393 if(not self.dSNPsPositions.has_key(position)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
394 self.dSNPsPositions[position] = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
395
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
396 def getUngappedPositionInRefSeq(self, position):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
397 if(position ==1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
398 nbOfGaps = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
399 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
400 seqIn5Prime = self._wrapper._iReferenceBioseq.subseq(1, position-1).sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
401 nbOfGaps = seqIn5Prime.count("-")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
402
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
403 return position - nbOfGaps
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
404
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
405 def getAllelesOfASubSeq(self, BioSeqDb, start, end):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
406 lAlleles = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
407 for iBioSeq in BioSeqDb:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
408 dAlleles = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
409 dAlleles['header'] = iBioSeq.header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
410 dAlleles['allele'] = iBioSeq.subseq(start, end).sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
411 lAlleles.append(dAlleles)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
412
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
413 return lAlleles
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
414
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
415 def getAnIndelLength(self, dIndel):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
416 length = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
417 if(dIndel['type'] == Multifasta2SNPFile.POLYM_TYPE_4_DELETION):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
418 length = dIndel['end'] - dIndel['start'] + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
419 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
420 length = len(dIndel['allele'])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
421
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
422 return length
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
423
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
424 def createWrapperFromFile(self, inFileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
425 faF = open(inFileName, "r")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
426 iBioSeqDB = self._extractSequencesFromInputFile(faF)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
427 faF.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
428
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
429 iBioSeqDB.upCase()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
430 referenceBioseq = iBioSeqDB[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
431 linesBioSeqDB = iBioSeqDB.extractPart(1, iBioSeqDB.getSize() - 1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
432
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
433 try:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
434 if(FileUtils.isEmpty(inFileName)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
435 msg = "The input file is empty!"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
436 self._logFile.error(self._prefixeWithLineNumber (msg))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
437 raise Exception (self._prefixeWithFileName (msg))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
438 if(self.isHeaderInRefSeqList(referenceBioseq.header)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
439 msg = "This reference sequence already exists in one previous file!"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
440 self._logFile.error(self._prefixeWithLineNumber (msg))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
441 raise Exception (self._prefixeWithLineNumber (msg))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
442 except Exception, e :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
443 raise Exception ("Problem with one input file: \n" + str(e))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
444
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
445 self._lRefSequences.append(referenceBioseq)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
446
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
447 return ReferenceBioseqAndLinesBioseqDBWrapper(referenceBioseq, linesBioSeqDB, self._logFile, inFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
448
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
449 def isHeaderInRefSeqList(self, header):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
450 isHeader = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
451 for item in self._lRefSequences:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
452 if item.header == header:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
453 isHeader = True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
454 return isHeader
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
455
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
456 def completeBatchList(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
457 dBatchResults = {'BatchNumber' : self._currentBatchNumber, 'BatchName' : self._batchName, 'GeneName' : self._geneName,'ContactNumber' : "1",
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
458 'ProtocolNumber' : "1", 'ThematicNumber' : "1", 'RefSeqName': self._wrapper._iReferenceBioseq.header}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
459
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
460 self._lBatchFileResults.append(dBatchResults)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
461
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
462 return self._lBatchFileResults
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
463
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
464 def getLineAsAHeader(self, lineToBeCheck, lineNumber = 0):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
465 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
466 header line begin with the tag(or token) '>' tag
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
467 ended with an carriage return
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
468 contain The name of sequence must respect this alphabet [a-zA-Z0-9_-:]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
469 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
470 obsHeader = lineToBeCheck
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
471 if obsHeader[0]!=">" :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
472 msg = "tag '>' omitted before header"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
473 self._logFile.error(self._prefixeWithLineNumber (msg))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
474 raise Exception (self._prefixeWithLineNumber (msg))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
475 else :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
476 obsHeader = obsHeader[1:]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
477 obsHeader = obsHeader.replace ("\n","")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
478 obsHeader = self._removeRepeatedBlanksInAStr(obsHeader)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
479 obsHeader = self._replaceBlankByUnderScoreInAStr(obsHeader)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
480 if self.checkHeaderAlphabet(obsHeader) :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
481 return obsHeader
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
482 self._logFile.error(self._prefixeWithLineNumber ("fatal error on header"))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
483 raise Exception (self._prefixeWithLineNumber ("fatal error on header"))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
484
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
485 def getLineAsASeq(self, lineToBeCheck):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
486 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
487 Sequence line
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
488 ended with an carriage return
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
489 contain only character of the IUPAC alphabet
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
490 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
491 obsSeq = str.upper(lineToBeCheck)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
492 obsSeq = obsSeq.replace ("\n","")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
493 obsSeq = obsSeq.replace ("\r","")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
494 obsLine = obsSeq.replace("-","")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
495 if not self.isIUPAC_bases(obsLine) :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
496 msg = "the sequence contain a non nucleic character "
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
497 self._logFile.error(self._prefixeWithLineNumber (msg))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
498 raise Exception (self._prefixeWithLineNumber (msg))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
499 return obsSeq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
500
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
501 def checkHeaderAlphabet( self, strToCheck):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
502 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
503 Check the string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
504 the string is not a header when founding a pattern not corresponding to the regexp
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
505 \W Matches any non-alphanumeric character; this is equivalent to the class [^a-zA-Z0-9_-:].
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
506 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
507 if strToCheck=="":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
508 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
509 p = re.compile('[^a-zA-Z0-9_:\-]', re.IGNORECASE) #p = re.compile('(\W|-|:)+', re.IGNORECASE)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
510 errList=p.findall(strToCheck)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
511 if len( errList ) > 0 :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
512 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
513 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
514 return True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
515
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
516 ## Check the string is nucleotides sequence from the DNA_ALPHABET_WITH_N = ["A","T","G","C","N"] of IUPAC nomenclature.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
517 # @return True if sequence contain A, T, G, C or N False otherwise
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
518 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
519 def isDNA_bases( self, sequence):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
520 if sequence == "" :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
521 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
522
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
523 setFromString = set()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
524
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
525 for nt in sequence :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
526 setFromString.add(nt)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
527
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
528 return setFromString.issubset(DNA_ALPHABET_WITH_N_AND_DELS)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
529
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
530 ## Check if the string is nucleotides sequence from the IUPAC ALPHABET .
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
531 # @return True if sequence contain IUPAC letters False otherwise
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
532 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
533 def isIUPAC_bases( self, sequence):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
534 if sequence == "" :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
535 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
536
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
537 setFromString = set()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
538
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
539 for nt in sequence :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
540 setFromString.add(nt)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
541
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
542 return setFromString.issubset(IUPAC)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
543
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
544 def _writeAllOutputFiles(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
545 writer = Multifasta2SNPFileWriter()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
546 writer.write(self)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
547
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
548 def _sortSubSNPResultByBatchPositionAndLineName(self, lSubSNPResults):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
549 return sorted(lSubSNPResults, key=lambda SNPresults: (SNPresults['batchNumber'], SNPresults['position'], SNPresults['lineName']))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
550
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
551 def _formatSubSNPName(self, currentLineHeader, position, polymType):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
552 shortPolymType = polymType[:3]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
553 return self._batchName + "_" + shortPolymType + "_" + str(position) + "_" + currentLineHeader
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
554
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
555 def _isSNPDetected(self, referenceSequence, index, nt):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
556 if((nt != referenceSequence[index]) and (nt.upper() != "N") and (referenceSequence[index].upper() != "N")):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
557 return True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
558 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
559 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
560
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
561 def _extractSequencesFromInputFile(self, inFile):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
562 # attention : DNA_ALPHABET_WITH_N_AND_DELS = Set (['A','T','G','C','N']) no including "gap"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
563 lInFileLines = inFile.readlines()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
564 nbOfLines = len(lInFileLines) - 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
565 #premiere lecture
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
566 self._iCurrentLineNumber = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
567 isSameSeq = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
568 newSeq = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
569 bioseqDB = BioseqDB ()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
570 while self._iCurrentLineNumber < nbOfLines :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
571 bioseq = Bioseq()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
572 bioseq.header = self.getLineAsAHeader( lInFileLines[self._iCurrentLineNumber] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
573 isSameSeq = True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
574 while isSameSeq and (self._iCurrentLineNumber < nbOfLines) :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
575 self._iCurrentLineNumber +=1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
576 if lInFileLines[self._iCurrentLineNumber][0] == ">" :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
577 isSameSeq = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
578 else :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
579 newSeq = newSeq + self.getLineAsASeq( lInFileLines[self._iCurrentLineNumber] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
580 isSameSeq = True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
581 bioseq.setSequence(newSeq)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
582 newSeq = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
583 bioseqDB.add(bioseq)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
584 return bioseqDB
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
585
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
586 def _removeRepeatedBlanksInAStr (self, StrToClean ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
587 resStr=StrToClean.expandtabs(2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
588 compResStr=resStr.replace (" "," ")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
589 while compResStr != resStr :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
590 resStr=compResStr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
591 compResStr=resStr.replace (" "," ")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
592 return resStr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
593
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
594 def _replaceBlankByUnderScoreInAStr (self, StrToClean ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
595 resStr = StrToClean.replace (" ","_")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
596 return resStr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
597
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
598 def _prefixeWithLineNumber (self, strMsg):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
599 resStr = "File: " + self._currentFileName + "\t"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
600 resStr += "Line %i " % (self._iCurrentLineNumber+1 ) + strMsg
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
601 return resStr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
602
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
603 def _prefixeWithFileName (self, strMsg):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
604 resStr = "File: " + self._currentFileName + "\n" + strMsg
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
605 return resStr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
606
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
607
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
608 def _completeAlleleSetWithCurrentAllele(self, dAlleleFileResults, dnaBase):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
609 if dAlleleFileResults.has_key(dnaBase):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
610 return dAlleleFileResults
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
611 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
612 iAlleleNumber = len(dAlleleFileResults) + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
613 dAlleleFileResults[dnaBase] = iAlleleNumber
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
614 return dAlleleFileResults
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
615
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
616 def _completeIndividualListWithCurrentIndividual(self, lIndividualResults, lineName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
617 if lIndividualResults == []:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
618 iIndividualNumber = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
619 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
620 iIndividualNumber = len(lIndividualResults) + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
621
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
622 #TODO: transformer la liste de dictionnaire en liste d'objets
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
623 if not (self._checkIfALineExistInList(lIndividualResults, lineName)):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
624 dIndividual2Add = {'individualNumber': iIndividualNumber, 'individualName': lineName, 'scientificName': self._taxon}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
625 lIndividualResults.append(dIndividual2Add)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
626
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
627 return lIndividualResults
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
628
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
629 def _completeBatchLineListWithCurrentIndividual(self, lBatchLineResults, lIndividualResults, lineName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
630 lineDict = self._getALineDictFromADictListWithALineName(lIndividualResults, lineName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
631
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
632 if len(lineDict) != 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
633 if(lineDict.has_key('individualNumber')):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
634 indivNumberOfTheLineDict = lineDict['individualNumber']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
635 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
636 msg = "Problem with the batchLine results construction: individual named " + lineName + " has no individual number!"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
637 self._logFile.error(msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
638 raise Exception (msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
639 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
640 msg = "Problem with the batchLine results construction: individual named " + lineName + " not in individual list!"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
641 self._logFile.error(msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
642 raise Exception (msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
643
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
644 dResults2Add = {'IndividualNumber': str(indivNumberOfTheLineDict), 'BatchNumber' : self._currentBatchNumber}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
645 lBatchLineResults.append(dResults2Add)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
646 return lBatchLineResults
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
647
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
648 def _getALineDictFromADictListWithALineName(self, lDictList, lineName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
649 dictToReturn = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
650 for myDict in lDictList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
651 if myDict['individualName'] == lineName:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
652 dictToReturn = myDict
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
653
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
654 return dictToReturn
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
655
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
656 def _checkIfALineExistInList(self, lDictList, lineName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
657 for myDict in lDictList:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
658 if myDict['individualName'] == lineName:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
659 return True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
660 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
661
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
662 def _getCurrentBatchResult(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
663 return self._lBatchFileResults[self._currentBatchNumber-1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
664
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
665
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
666
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
667
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
668 class ReferenceBioseqAndLinesBioseqDBWrapper (object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
669
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
670 def __init__ (self, iReferenceBioseq, iLinesBioSeqDB, logger, fileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
671 self._iReferenceBioseq = iReferenceBioseq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
672 self._iLinesBioseqDB = iLinesBioSeqDB
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
673 self._logger = logger
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
674 self._currentFileName = fileName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
675 self._checkAllSeqs()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
676
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
677
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
678 def _checkAllSeqs(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
679 self._iReferenceBioseq.checkEOF()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
680 refSeqLen = self._iReferenceBioseq.getLength()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
681
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
682 for seq in self._iLinesBioseqDB.db:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
683 seq.checkEOF()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
684 if(not seq.getLength() == refSeqLen):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
685 msg = "File: " + self._currentFileName + ", problem with the sequence " + seq.header + ": its length is different from the reference seq! All the sequences must have the same length.\n"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
686 msg += "refseq length: " + str(refSeqLen) + "\n"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
687 msg += "seq length: " + str(seq.getLength()) + "\n"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
688 self._logger.error(msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
689 raise Exception (msg)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
690
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
691 def getLinesBioseqInstances(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
692 return self._iLinesBioseqDB.db
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
693
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
694 def getReferenceSequence(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
695 return self._iReferenceBioseq.sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
696
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
697 class Multifasta2SNPFileWriter(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
698
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
699 SUB_SNP_FILE_HEADER = ["SubSNPName","ConfidenceValue","Type","Position","5flank",
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
700 "3flank","Length","BatchNumber","IndividualNumber","PrimerType","PrimerNumber","Forward_or_Reverse","AlleleNumber"]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
701
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
702 ALLELE_FILE_HEADER = ["AlleleNumber","Value","Motif","NbCopy","Comment"]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
703
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
704 INDIVIDUAL_FILE_HEADER = ["IndividualNumber","IndividualName","Description","AberrAneuploide",
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
705 "FractionLength","DeletionLineSynthesis","UrlEarImage","TypeLine","ChromNumber","ArmChrom","DeletionBin","ScientificName",
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
706 "local_germplasm_name","submitter_code","local_institute","donor_institute","donor_acc_id"]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
707
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
708 SEQUENCE_CSV_FILE_HEADER = ["SequenceName","SeqType","BankName","BankVersion","ACNumber","Locus","ScientificName"]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
709
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
710 BATCH_TXT_FILE_HEADER = ["BatchNumber", "BatchName", "GeneName", "Description", "ContactNumber", "ProtocolNumber", "ThematicNumber", "RefSeqName", "AlignmentFileName", "SeqName"]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
711
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
712 BATCH_LINE_FILE_HEADER = ["IndividualNumber", "Pos5", "Pos3", "BatchNumber", "Sequence"]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
713
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
714 def __init__(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
715 self._csvFieldSeparator = ";"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
716 self._txtSubFieldSeparator = ": "
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
717 self._txtFieldSeparator = "\n"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
718 self._primerType = "Sequence"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
719 self._csvLineSeparator = "\n"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
720 self._txtLineSeparator = "//\n"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
721
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
722 def write(self, iMultifasta2SNPFile):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
723 self._writeSubSNPFile(iMultifasta2SNPFile._outSubSNPFileName, iMultifasta2SNPFile._lSubSNPFileResults)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
724 self._writeAlleleFile(iMultifasta2SNPFile._outAlleleFileName, iMultifasta2SNPFile._dAlleleFileResults)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
725 self._writeIndividualFile(iMultifasta2SNPFile._outIndividualFileName, iMultifasta2SNPFile._lIndividualFileResults)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
726 self._writeSequenceFiles(iMultifasta2SNPFile._outSequenceFSAFileName, iMultifasta2SNPFile._outSequenceCSVFileName, iMultifasta2SNPFile._lRefSequences, iMultifasta2SNPFile._taxon)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
727 self._writeBatchFile(iMultifasta2SNPFile._outBatchFileName, iMultifasta2SNPFile._lBatchFileResults)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
728 self._writeBatchLineFile(iMultifasta2SNPFile._outBatchLineFileName, iMultifasta2SNPFile._lBatchLineFileResults)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
729
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
730 def sortAlleleResultByAlleleNumber(self, dAlleleFileResults):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
731 return sorted(dAlleleFileResults.items(), key=lambda(k,v):(v,k))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
732
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
733 def _writeSubSNPFile(self, subSNPFileName, lSNP2Write):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
734 outF = open(subSNPFileName, "w")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
735 self._writeSNPFileHeader(outF)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
736 for dSNP in lSNP2Write:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
737 self._writeSNPFileLine(outF, dSNP)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
738 outF.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
739
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
740 def _writeAlleleFile(self, alleleFileName, dAllele2Write):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
741 outF = open(alleleFileName, "w")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
742 self._writeAlleleFileHeader(outF)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
743 lAlleleSortedResults = self.sortAlleleResultByAlleleNumber(dAllele2Write)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
744 for tAllele in lAlleleSortedResults:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
745 self._writeAlleleFileLine(outF, tAllele[0], tAllele[1])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
746
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
747 outF.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
748
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
749 def _writeIndividualFile(self, individualFileName, lIndividual2Write):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
750 sorted(lIndividual2Write, key=lambda Individual: (Individual['individualNumber']))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
751 outF = open(individualFileName, "w")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
752 self._writeIndividualFileHeader(outF)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
753
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
754 for dIndiv in lIndividual2Write:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
755 self._writeIndividualFileLine(outF, dIndiv)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
756
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
757 outF.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
758
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
759 def _writeSequenceFiles(self, sequenceFSAFileName, sequenceCSVFileName, lRefSequences, taxon):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
760 outFSA = open(sequenceFSAFileName, "w")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
761 outCSV = open(sequenceCSVFileName, "w")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
762 self._writeSequenceCSVHeader(outCSV)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
763
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
764 for refSeq in lRefSequences:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
765 refSeq.cleanGap()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
766 self._writeSequenceFSAFile(outFSA, refSeq)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
767 self._writeSequenceCSVLine(outCSV, refSeq, taxon)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
768
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
769 outFSA.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
770 outCSV.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
771
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
772 def _writeSequenceFSAFile(self, outF, refSeq):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
773 outF.write( ">%s\n" % ( refSeq.header ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
774 outF.write( "%s\n" % ( refSeq.sequence[0:refSeq.getLength()] ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
775
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
776
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
777 def _writeBatchFile(self, batchFileName, lBatchResults):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
778 outF = open(batchFileName, "w")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
779 for dBatchResults in lBatchResults:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
780 for head in Multifasta2SNPFileWriter.BATCH_TXT_FILE_HEADER[:]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
781 if dBatchResults.has_key(head):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
782 outF.write(head + self._txtSubFieldSeparator + str(dBatchResults[head]) + self._txtFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
783 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
784 outF.write(head + self._txtSubFieldSeparator + self._txtFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
785
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
786 outF.write(self._txtLineSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
787
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
788 outF.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
789
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
790 def _writeBatchLineFile(self, batchLineFileName, lBatchLineResults):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
791 outF = open(batchLineFileName, "w")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
792 self._writeBatchLineFileHeader(outF)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
793 for dResult in lBatchLineResults:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
794 self._writeBatchLineFileLine(outF, dResult)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
795 outF.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
796
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
797 def _writeSNPFileHeader(self, outF):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
798 for head in Multifasta2SNPFileWriter.SUB_SNP_FILE_HEADER[:-1]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
799 outF.write(head + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
800 outF.write(Multifasta2SNPFileWriter.SUB_SNP_FILE_HEADER[-1] + self._csvLineSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
801
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
802 def _writeAlleleFileHeader(self, outF):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
803 for head in Multifasta2SNPFileWriter.ALLELE_FILE_HEADER[:-1]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
804 outF.write(head + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
805 outF.write(Multifasta2SNPFileWriter.ALLELE_FILE_HEADER[-1] + self._csvLineSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
806
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
807 def _writeIndividualFileHeader(self, outF):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
808 for head in Multifasta2SNPFileWriter.INDIVIDUAL_FILE_HEADER[:-1]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
809 outF.write(head + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
810 outF.write(Multifasta2SNPFileWriter.INDIVIDUAL_FILE_HEADER[-1] + self._csvLineSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
811
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
812 def _writeSequenceCSVHeader(self, outF):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
813 for head in Multifasta2SNPFileWriter.SEQUENCE_CSV_FILE_HEADER[:-1]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
814 outF.write(head + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
815 outF.write(Multifasta2SNPFileWriter.SEQUENCE_CSV_FILE_HEADER[-1] + self._csvLineSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
816
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
817 def _writeBatchLineFileHeader(self, outF):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
818 for head in Multifasta2SNPFileWriter.BATCH_LINE_FILE_HEADER[:-1]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
819 outF.write(head + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
820 outF.write(Multifasta2SNPFileWriter.BATCH_LINE_FILE_HEADER[-1] + self._csvLineSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
821
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
822 def _writeSNPFileLine(self, outF, dSNP):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
823 outF.write(dSNP['subSNPName'] + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
824 outF.write(dSNP['confidenceValue'] + self._csvFieldSeparator + dSNP['type'] + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
825 outF.write(str(dSNP['position']) + self._csvFieldSeparator + dSNP['5flank'] + self._csvFieldSeparator + dSNP['3flank'] + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
826 outF.write(str(dSNP['length']) + self._csvFieldSeparator + str(dSNP['batchNumber']) + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
827 outF.write(str(dSNP['lineName']) + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
828 outF.write(self._primerType + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + str(dSNP['allele']) + self._csvLineSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
829
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
830 def _writeAlleleFileLine(self, outF, sAllele2Write, iAlleleNumber):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
831 outF.write(str(iAlleleNumber) + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
832 outF.write(sAllele2Write + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvLineSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
833
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
834 def _writeIndividualFileLine(self, outF, dIndividual):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
835 outF.write(str(dIndividual['individualNumber']) + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
836 outF.write(dIndividual['individualName'] + self._csvFieldSeparator + self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
837 outF.write(dIndividual['scientificName'] + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator+ self._csvFieldSeparator + self._csvFieldSeparator + self._csvLineSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
838
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
839 def _writeSequenceCSVLine(self, outF, refSeq, taxon):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
840 outF.write(refSeq.header + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
841 outF.write("Reference" + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
842 outF.write(taxon + self._csvLineSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
843
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
844 def _writeBatchLineFileLine(self, outF, dResult):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
845 outF.write(str(dResult['IndividualNumber']) + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
846 outF.write(str(dResult['BatchNumber']) + self._csvFieldSeparator + self._csvLineSeparator)