Mercurial > repos > yufei-luo > s_mart
comparison commons/core/parsing/Multifasta2SNPFile.py @ 6:769e306b7933
Change the repository level.
| author | yufei-luo |
|---|---|
| date | Fri, 18 Jan 2013 04:54:14 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 5:ea3082881bf8 | 6:769e306b7933 |
|---|---|
| 1 import re | |
| 2 import os | |
| 3 import logging | |
| 4 from commons.core.utils.FileUtils import FileUtils | |
| 5 from commons.core.seq.BioseqDB import BioseqDB | |
| 6 from commons.core.seq.Bioseq import Bioseq | |
| 7 from commons.core.LoggerFactory import LoggerFactory | |
| 8 | |
| 9 DNA_ALPHABET_WITH_N_AND_DELS = set (['A','T','G','C','N','-']) | |
| 10 IUPAC = set(['A','T','G','C','U','R','Y','M','K','W','S','B','D','H','V','N', '-', 'a','t','g','c','u','r','y','m','k','w','s','b','d','h','v','n']) | |
| 11 | |
| 12 class Multifasta2SNPFile( object ): | |
| 13 | |
| 14 POLYM_TYPE_4_SNP = "SNP" | |
| 15 POLYM_TYPE_4_INSERTION = "INSERTION" | |
| 16 POLYM_TYPE_4_DELETION = "DELETION" | |
| 17 POLYM_DEFAULT_CONFIDENCE_VALUE = "A" | |
| 18 SNP_LENGTH = 1 | |
| 19 FLANK_LENGTH = 250 | |
| 20 | |
| 21 def __init__(self, taxon, batchName="", geneName=""): | |
| 22 | |
| 23 if(batchName): | |
| 24 self._batchName = batchName | |
| 25 | |
| 26 if(geneName): | |
| 27 self._geneName = geneName | |
| 28 | |
| 29 self._taxon = taxon | |
| 30 self._outSubSNPFileName = "SubSNP.csv" | |
| 31 self._outAlleleFileName = "Allele.csv" | |
| 32 self._outIndividualFileName = "Individual.csv" | |
| 33 self._outSequenceFSAFileName = "Sequences.fsa" | |
| 34 self._outSequenceCSVFileName = "Sequences.csv" | |
| 35 self._outBatchFileName = "Batch.txt" | |
| 36 self._outBatchLineFileName = "BatchLine.csv" | |
| 37 self._logFileName = "multifasta2SNP.log" | |
| 38 | |
| 39 self._lBatchFileResults = [] | |
| 40 self._lSubSNPFileResults = [] | |
| 41 self._lRefSequences = [] | |
| 42 self._lIndividualFileResults = [] | |
| 43 self._lBatchLineFileResults = [] | |
| 44 self._dIndividualNumbers4SubSNPResults = {} | |
| 45 self._dAlleleFileResults = {} | |
| 46 | |
| 47 | |
| 48 self.dcurrentIndel = {} | |
| 49 self.lIndelsOfTheCurrentLine = [] | |
| 50 self.lIndelsOverAllLines = [] | |
| 51 self.dSNPsPositions = {} | |
| 52 | |
| 53 self._iCurrentLineNumber = 0 | |
| 54 self._currentBatchNumber = 1 | |
| 55 self.currentLineName = "" | |
| 56 self.currentNucleotide = "" | |
| 57 self.currentPosition = 0 | |
| 58 self._sPolymConfidenceValue = Multifasta2SNPFile.POLYM_DEFAULT_CONFIDENCE_VALUE | |
| 59 self._sPolymType = Multifasta2SNPFile.POLYM_TYPE_4_SNP | |
| 60 self._iPolymLength = Multifasta2SNPFile.SNP_LENGTH | |
| 61 self._fileUtils = FileUtils() | |
| 62 | |
| 63 if self._fileUtils.isRessourceExists(self._logFileName): | |
| 64 os.remove(self._logFileName) | |
| 65 self._logFile = LoggerFactory.createLogger(self._logFileName, logging.INFO, "%(asctime)s %(levelname)s: %(message)s") | |
| 66 | |
| 67 def runOneBatch( self, inFileName): | |
| 68 self._currentFileName = inFileName | |
| 69 #TODO: methode a virer; n'utiliser au final que runOneBatchWithoutWriting | |
| 70 self._wrapper = self.createWrapperFromFile(inFileName) | |
| 71 self._lBatchFileResults = self.completeBatchList() | |
| 72 self.detectSNPsAndIndels(self._wrapper) | |
| 73 self._writeAllOutputFiles() | |
| 74 self._currentBatchNumber += 1 | |
| 75 | |
| 76 def runOneBatchWithoutWriting( self, inFileName): | |
| 77 self.lIndelsOverAllLines = [] | |
| 78 self._currentFileName = inFileName | |
| 79 self._wrapper = self.createWrapperFromFile(inFileName) | |
| 80 self._lBatchFileResults = self.completeBatchList() | |
| 81 self.detectSNPsAndIndels(self._wrapper) | |
| 82 self._currentBatchNumber += 1 | |
| 83 | |
| 84 | |
| 85 def _cleanOutputsInTheCurrentDir(self): | |
| 86 #TODO: create a list of files to be deleted | |
| 87 FileUtils.removeFilesByPattern("*.csv") | |
| 88 if (FileUtils.isRessourceExists(self._outBatchFileName)): | |
| 89 os.remove(self._outBatchFileName) | |
| 90 if (FileUtils.isRessourceExists(self._outSequenceFSAFileName)): | |
| 91 os.remove(self._outSequenceFSAFileName) | |
| 92 | |
| 93 | |
| 94 def _createOutputObjectsIteratingOnCurrentDir(self): | |
| 95 #TODO: gerer les extensions multiples | |
| 96 extList = [".fasta", ".fsa"] | |
| 97 for dirname, dirnames, filenames in os.walk("."): | |
| 98 filenames.sort() | |
| 99 for filename in filenames: | |
| 100 if os.path.splitext(filename)[1] in extList: | |
| 101 self._geneName = os.path.splitext(filename)[0] | |
| 102 self._batchName = "Batch_" + self._geneName | |
| 103 self.runOneBatchWithoutWriting(filename) | |
| 104 | |
| 105 def runSeveralBatches( self, inputDir): | |
| 106 #TODO: enlever les chdirs, appeler les fichiers en absolu et modifier les tests en consequences | |
| 107 os.chdir(inputDir) | |
| 108 self._cleanOutputsInTheCurrentDir() | |
| 109 self._createOutputObjectsIteratingOnCurrentDir() | |
| 110 self._writeAllOutputFiles() | |
| 111 os.chdir("../") | |
| 112 | |
| 113 | |
| 114 def _treatADeletionClosingWithAnotherBaseThanRefSeq(self, lineName, nucleotide, position): | |
| 115 if (self.isTheIndelOpen4ThisLine): | |
| 116 self._closeTheCurrentIndel(lineName, nucleotide, position) | |
| 117 self._manageSNPs(lineName, nucleotide, position) | |
| 118 self.addOnePolymorphicPosition(position) | |
| 119 | |
| 120 def _treatNucleotideDifferentThanRefSeqCase(self, refSeq, lineName, index, nucleotide, position): | |
| 121 if (nucleotide == "-" or refSeq[index] == "-"): | |
| 122 if (self.isTheIndelOpen4ThisLine): | |
| 123 self._expandTheCurrentIndel(position, nucleotide) | |
| 124 else: | |
| 125 self._startAnIndel(position, nucleotide) | |
| 126 else: | |
| 127 self._treatADeletionClosingWithAnotherBaseThanRefSeq(lineName, nucleotide, position) | |
| 128 | |
| 129 | |
| 130 def _treatSameNucleotideInOneIndel(self, refSeq, lineName, index, nucleotide, position): | |
| 131 if (self._sPolymType == Multifasta2SNPFile.POLYM_TYPE_4_DELETION): | |
| 132 self._closeTheCurrentIndel(lineName, nucleotide, position) | |
| 133 elif (self._sPolymType == Multifasta2SNPFile.POLYM_TYPE_4_INSERTION): | |
| 134 if (refSeq[index] == "-"): | |
| 135 self._expandTheCurrentIndel(position, nucleotide) | |
| 136 else: | |
| 137 self._closeTheCurrentIndel(lineName, nucleotide, position) | |
| 138 | |
| 139 def detectSNPsAndIndels(self, iRefAndLines): | |
| 140 refSeq = iRefAndLines.getReferenceSequence() | |
| 141 refSeqLength = len ( refSeq ) | |
| 142 self.dSNPsPositions = {} | |
| 143 | |
| 144 for iLineBioseq in iRefAndLines.getLinesBioseqInstances(): | |
| 145 lineSequence = iLineBioseq.sequence | |
| 146 self.currentLineName = iLineBioseq.header | |
| 147 self._manageCurrentIndividual(self.currentLineName) | |
| 148 | |
| 149 index = 0 | |
| 150 self.isTheIndelOpen4ThisLine = 0 | |
| 151 self.lIndelsOfTheCurrentLine = [] | |
| 152 for nucleotide in lineSequence: | |
| 153 position = index + 1 | |
| 154 if (index < refSeqLength) and self._isSNPDetected(refSeq, index, nucleotide): | |
| 155 self._treatNucleotideDifferentThanRefSeqCase(refSeq, self.currentLineName, index, nucleotide, position) | |
| 156 elif(index < refSeqLength and self.isTheIndelOpen4ThisLine) : | |
| 157 self._treatSameNucleotideInOneIndel(refSeq, self.currentLineName, index, nucleotide, position) | |
| 158 index = index + 1 | |
| 159 self.currentNucleotide = nucleotide | |
| 160 self.currentPosition = position | |
| 161 | |
| 162 self.lIndelsOverAllLines = self.lIndelsOverAllLines + self.lIndelsOfTheCurrentLine | |
| 163 | |
| 164 self._postTraitementDetectSNP(self.currentLineName, self.currentNucleotide, self.currentPosition) | |
| 165 | |
| 166 def _manageCurrentIndividual(self, lineName): | |
| 167 self._lIndividualFileResults = self._completeIndividualListWithCurrentIndividual(self._lIndividualFileResults, lineName) | |
| 168 self._lBatchLineFileResults = self._completeBatchLineListWithCurrentIndividual(self._lBatchLineFileResults, self._lIndividualFileResults, lineName) | |
| 169 if not self._dIndividualNumbers4SubSNPResults.__contains__(lineName): | |
| 170 self._dIndividualNumbers4SubSNPResults[lineName] = len(self._lIndividualFileResults) | |
| 171 | |
| 172 | |
| 173 def _manageLastPositionIndels(self, lineName, nucleotide, position): | |
| 174 if (self.isTheIndelOpen4ThisLine): | |
| 175 self._closeTheCurrentIndel(lineName, nucleotide, position) | |
| 176 self.lIndelsOverAllLines.append(self.lIndelsOfTheCurrentLine.pop()) | |
| 177 | |
| 178 def _postTraitementDetectSNP(self, lineName, nucleotide, position): | |
| 179 self._manageLastPositionIndels(lineName, nucleotide, position) | |
| 180 | |
| 181 self._mergeAllelesAndSubSNPsFromOverlappingIndels() | |
| 182 self._addMissingsAllelesAndSubSNPs() | |
| 183 | |
| 184 self._lSubSNPFileResults = self._sortSubSNPResultByBatchPositionAndLineName(self._lSubSNPFileResults) | |
| 185 | |
| 186 def _manageSNPs(self, lineName, nucleotide, position): | |
| 187 self._dAlleleFileResults = self._completeAlleleSetWithCurrentAllele(self._dAlleleFileResults, nucleotide) | |
| 188 truePosition = self.getUngappedPositionInRefSeq(position) | |
| 189 subSNPName = self._formatSubSNPName(lineName, truePosition, Multifasta2SNPFile.POLYM_TYPE_4_SNP) | |
| 190 iAlleleNumber = self._dAlleleFileResults[nucleotide] | |
| 191 self._sPolymType = Multifasta2SNPFile.POLYM_TYPE_4_SNP | |
| 192 flank5Prime, flank3Prime = self.getFlanksOfASubSNP(lineName, position, Multifasta2SNPFile.SNP_LENGTH, Multifasta2SNPFile.FLANK_LENGTH) | |
| 193 dSubSNPResult = {'subSNPName':subSNPName, 'position':truePosition, 'lineName':self._dIndividualNumbers4SubSNPResults[lineName], | |
| 194 'allele':iAlleleNumber, 'batchNumber': self._currentBatchNumber, 'confidenceValue':self._sPolymConfidenceValue, | |
| 195 'type':self._sPolymType, 'length': Multifasta2SNPFile.SNP_LENGTH, | |
| 196 '5flank':flank5Prime, '3flank':flank3Prime} | |
| 197 if(not self.subSNPExistsInSubSNPList(dSubSNPResult, self._lSubSNPFileResults)): | |
| 198 self._lSubSNPFileResults.append(dSubSNPResult) | |
| 199 | |
| 200 def _startAnIndel(self, position, nucleotide): | |
| 201 self.dcurrentIndel['start'] = position | |
| 202 self.dcurrentIndel['end'] = position | |
| 203 self.sCurrentIndelAllele = nucleotide | |
| 204 if(nucleotide == "-"): | |
| 205 self._sPolymType = Multifasta2SNPFile.POLYM_TYPE_4_DELETION | |
| 206 else: | |
| 207 self._sPolymType = Multifasta2SNPFile.POLYM_TYPE_4_INSERTION | |
| 208 self.isTheIndelOpen4ThisLine = 1 | |
| 209 | |
| 210 def _expandTheCurrentIndel(self, position, nucleotide): | |
| 211 self.sCurrentIndelAllele = self.sCurrentIndelAllele + nucleotide | |
| 212 self.dcurrentIndel['end'] = position | |
| 213 | |
| 214 def _closeTheCurrentIndel(self, lineName, nucleotide, position): | |
| 215 subSNPName = self._formatSubSNPName(lineName, self.dcurrentIndel['start'], self._sPolymType) | |
| 216 | |
| 217 dIndel4TheLine = {'name': subSNPName, 'lineName': lineName, 'start': self.dcurrentIndel['start'],'end' :self.dcurrentIndel['end'], | |
| 218 'allele': self.sCurrentIndelAllele, 'type': self._sPolymType, 'length': self._iPolymLength} | |
| 219 | |
| 220 dIndel4TheLine['length'] = self.getAnIndelLength(dIndel4TheLine) | |
| 221 | |
| 222 self.lIndelsOfTheCurrentLine.append(dIndel4TheLine) | |
| 223 | |
| 224 self.dcurrentIndel.clear() | |
| 225 self.isTheIndelOpen4ThisLine = 0 | |
| 226 | |
| 227 | |
| 228 def _mergeAllelesAndSubSNPsFromOverlappingIndels(self): | |
| 229 lIndelList = [] | |
| 230 for dIndel in self.lIndelsOverAllLines: | |
| 231 lIndelList = self.clusteriseIndels(dIndel, self.lIndelsOverAllLines) | |
| 232 | |
| 233 for dIndel in lIndelList: | |
| 234 oldAllele = dIndel['allele'] | |
| 235 start = dIndel['start'] | |
| 236 stop = dIndel['end'] | |
| 237 lineName = dIndel['lineName'] | |
| 238 | |
| 239 LineBioSeq = self._wrapper._iLinesBioseqDB.fetch(lineName) | |
| 240 dIndel = self.updateAllele(oldAllele, start, stop, LineBioSeq, dIndel) | |
| 241 dSubSNPResult = self.createSubSNPFromAMissingPolym(dIndel, lineName) | |
| 242 if(not self.subSNPExistsInSubSNPList(dSubSNPResult, self._lSubSNPFileResults)): | |
| 243 self._lSubSNPFileResults.append(dSubSNPResult) | |
| 244 | |
| 245 def updateAllele(self, oldAllele, start, stop, LineBioSeq, dIndel): | |
| 246 #TODO: creer le test | |
| 247 newAllele = LineBioSeq.subseq(start, stop).sequence | |
| 248 if newAllele != oldAllele: | |
| 249 dIndel['allele'] = newAllele | |
| 250 self._dAlleleFileResults = self._completeAlleleSetWithCurrentAllele(self._dAlleleFileResults, newAllele) | |
| 251 return dIndel | |
| 252 | |
| 253 | |
| 254 def getFlanksOfASubSNP(self, lineName, subsnpPosition, polymLength, flankLength): | |
| 255 bioSeqOfTheLine = self._wrapper._iLinesBioseqDB.fetch(lineName) | |
| 256 flank5Prime = bioSeqOfTheLine.get5PrimeFlank(subsnpPosition, flankLength) | |
| 257 flank3Prime = bioSeqOfTheLine.get3PrimeFlank(subsnpPosition, flankLength, polymLength) | |
| 258 | |
| 259 return flank5Prime, flank3Prime | |
| 260 | |
| 261 def createSubSNPFromAMissingPolym(self, dIndel, lineName): | |
| 262 if(dIndel['type'] == Multifasta2SNPFile.POLYM_TYPE_4_INSERTION): | |
| 263 start = self.getUngappedPositionInRefSeq(dIndel['start']-1) | |
| 264 else: | |
| 265 start = self.getUngappedPositionInRefSeq(dIndel['start']) | |
| 266 | |
| 267 subSNPName = self._formatSubSNPName(dIndel['lineName'], start, dIndel['type']) | |
| 268 | |
| 269 iAlleleNumber = self._dAlleleFileResults[dIndel['allele']] | |
| 270 | |
| 271 iPolymLength = self.getAnIndelLength(dIndel) | |
| 272 | |
| 273 flank5Prime, flank3Prime = self.getFlanksOfASubSNP(lineName, dIndel['start'], iPolymLength, Multifasta2SNPFile.FLANK_LENGTH) | |
| 274 | |
| 275 dSubSNPResult = {'subSNPName':subSNPName, 'position':start, 'lineName':self._dIndividualNumbers4SubSNPResults[lineName], 'allele':iAlleleNumber, | |
| 276 'batchNumber': self._currentBatchNumber, 'confidenceValue':self._sPolymConfidenceValue, 'type':dIndel['type'], | |
| 277 'length': iPolymLength, '5flank':flank5Prime, '3flank':flank3Prime} | |
| 278 | |
| 279 return dSubSNPResult | |
| 280 | |
| 281 def clusteriseIndels(self, dIndel, lIndelsOverAllLines): | |
| 282 iIndice = 0 | |
| 283 for dIndel in lIndelsOverAllLines: | |
| 284 iIndice2Compare = 0 | |
| 285 for dIndel2Compare in lIndelsOverAllLines: | |
| 286 dIndel, dIndel2Compare = self.mergeBoundsForTwoOverlappingIndels(dIndel, dIndel2Compare) | |
| 287 lIndelsOverAllLines = self.updateBoundsForAnIndelInAnIndelList(lIndelsOverAllLines, dIndel) | |
| 288 lIndelsOverAllLines = self.updateBoundsForAnIndelInAnIndelList(lIndelsOverAllLines, dIndel2Compare) | |
| 289 iIndice2Compare = iIndice2Compare + 1 | |
| 290 iIndice = iIndice + 1 | |
| 291 | |
| 292 return lIndelsOverAllLines | |
| 293 | |
| 294 def mergeBoundsForTwoOverlappingIndels(self, dIndel1, dIndel2): | |
| 295 if((dIndel2['start'] <= dIndel1['start']) and (dIndel2['end'] >= dIndel1['start']) or | |
| 296 (dIndel1['start'] <= dIndel2['start']) and (dIndel1['end'] >= dIndel2['start'])): | |
| 297 if(dIndel1['start'] <= dIndel2['start']): | |
| 298 iStart = dIndel1['start'] | |
| 299 else: | |
| 300 iStart = dIndel2['start'] | |
| 301 | |
| 302 if(dIndel1['end'] >= dIndel2['end']): | |
| 303 iEnd = dIndel1['end'] | |
| 304 else: | |
| 305 iEnd = dIndel2['end'] | |
| 306 | |
| 307 dIndel1['start'] = iStart | |
| 308 dIndel1['end'] = iEnd | |
| 309 dIndel2['start'] = iStart | |
| 310 dIndel2['end'] = iEnd | |
| 311 | |
| 312 return dIndel1, dIndel2 | |
| 313 | |
| 314 def updateBoundsForAnIndelInAnIndelList(self, lIndelsList, dIndelWithNewBounds): | |
| 315 name = dIndelWithNewBounds['name'] | |
| 316 dIndelInTheList, iIndice = self.findAnIndelInAListWithHisName(name, lIndelsList) | |
| 317 lIndelsList.remove(dIndelInTheList) | |
| 318 lIndelsList.insert(iIndice, dIndelWithNewBounds) | |
| 319 return lIndelsList | |
| 320 | |
| 321 | |
| 322 def findASubSNPInAListWithHisName(self, name, lSubSNPList): | |
| 323 dSubSNP2Find = {} | |
| 324 indice = 0 | |
| 325 indice2Find = -1 | |
| 326 for dSubSNP in lSubSNPList: | |
| 327 if(dSubSNP['subSNPName'] == name): | |
| 328 dSubSNP2Find = dSubSNP | |
| 329 indice2Find = indice | |
| 330 indice = indice + 1 | |
| 331 | |
| 332 if dSubSNP2Find == {} or indice2Find == -1: | |
| 333 msg = "trying to find a SubSNP not existing: " + name | |
| 334 self._logFile.error(msg) | |
| 335 raise Exception ("trying to find a SubSNP not existing: " + name) | |
| 336 else: | |
| 337 return dSubSNP2Find, indice2Find | |
| 338 | |
| 339 def subSNPExistsInSubSNPList(self, dSubSNP2Find, lSubSNPList): | |
| 340 flag = 0 | |
| 341 for dSubSNP in lSubSNPList: | |
| 342 if(dSubSNP2Find['subSNPName'] == dSubSNP['subSNPName']): | |
| 343 flag = 1 | |
| 344 | |
| 345 if flag == 1: | |
| 346 return True | |
| 347 else: | |
| 348 return False | |
| 349 | |
| 350 | |
| 351 def findAnIndelInAListWithHisName(self, name, lIndelList): | |
| 352 dIndel2Find = {} | |
| 353 indice = 0 | |
| 354 indice2Find = -1 | |
| 355 for dIndel in lIndelList: | |
| 356 if(dIndel['name'] == name): | |
| 357 dIndel2Find = dIndel | |
| 358 indice2Find = indice | |
| 359 indice = indice + 1 | |
| 360 | |
| 361 if dIndel2Find == {} or indice2Find == -1: | |
| 362 msg = "trying to find an indel not existing: " + name | |
| 363 self._logFile.error(msg) | |
| 364 raise Exception (msg) | |
| 365 else: | |
| 366 return dIndel2Find, indice2Find | |
| 367 | |
| 368 def _addMissingsAllelesAndSubSNPs(self): | |
| 369 for dIndel in self.lIndelsOverAllLines: | |
| 370 start = dIndel['start'] | |
| 371 end = dIndel['end'] | |
| 372 type = dIndel['type'] | |
| 373 self.addMissingAllelesAndSubSNPsForOnePolym(start, end, type) | |
| 374 | |
| 375 for position in self.dSNPsPositions: | |
| 376 self.addMissingAllelesAndSubSNPsForOnePolym(position, position, "SNP") | |
| 377 | |
| 378 def addMissingAllelesAndSubSNPsForOnePolym(self, start, end, polymType): | |
| 379 refSeqAllele = self._wrapper._iReferenceBioseq.subseq(start, end).sequence | |
| 380 BioSeqDb = self._wrapper.getLinesBioseqInstances() | |
| 381 lBioSeqDbAlleles = self.getAllelesOfASubSeq(BioSeqDb, start, end) | |
| 382 for subSequence in lBioSeqDbAlleles: | |
| 383 if(subSequence['allele'] == refSeqAllele): | |
| 384 lineName = subSequence['header'] | |
| 385 dMissingPolym = {'lineName': lineName, 'start': start,'end' :end, | |
| 386 'allele': subSequence['allele'], 'type':polymType} | |
| 387 self._dAlleleFileResults = self._completeAlleleSetWithCurrentAllele(self._dAlleleFileResults, subSequence['allele']) | |
| 388 dSubSNPResult = self.createSubSNPFromAMissingPolym(dMissingPolym, lineName) | |
| 389 if(not self.subSNPExistsInSubSNPList(dSubSNPResult, self._lSubSNPFileResults)): | |
| 390 self._lSubSNPFileResults.append(dSubSNPResult) | |
| 391 | |
| 392 def addOnePolymorphicPosition(self, position): | |
| 393 if(not self.dSNPsPositions.has_key(position)): | |
| 394 self.dSNPsPositions[position] = 1 | |
| 395 | |
| 396 def getUngappedPositionInRefSeq(self, position): | |
| 397 if(position ==1): | |
| 398 nbOfGaps = 0 | |
| 399 else: | |
| 400 seqIn5Prime = self._wrapper._iReferenceBioseq.subseq(1, position-1).sequence | |
| 401 nbOfGaps = seqIn5Prime.count("-") | |
| 402 | |
| 403 return position - nbOfGaps | |
| 404 | |
| 405 def getAllelesOfASubSeq(self, BioSeqDb, start, end): | |
| 406 lAlleles = [] | |
| 407 for iBioSeq in BioSeqDb: | |
| 408 dAlleles = {} | |
| 409 dAlleles['header'] = iBioSeq.header | |
| 410 dAlleles['allele'] = iBioSeq.subseq(start, end).sequence | |
| 411 lAlleles.append(dAlleles) | |
| 412 | |
| 413 return lAlleles | |
| 414 | |
| 415 def getAnIndelLength(self, dIndel): | |
| 416 length = 0 | |
| 417 if(dIndel['type'] == Multifasta2SNPFile.POLYM_TYPE_4_DELETION): | |
| 418 length = dIndel['end'] - dIndel['start'] + 1 | |
| 419 else: | |
| 420 length = len(dIndel['allele']) | |
| 421 | |
| 422 return length | |
| 423 | |
| 424 def createWrapperFromFile(self, inFileName): | |
| 425 faF = open(inFileName, "r") | |
| 426 iBioSeqDB = self._extractSequencesFromInputFile(faF) | |
| 427 faF.close() | |
| 428 | |
| 429 iBioSeqDB.upCase() | |
| 430 referenceBioseq = iBioSeqDB[0] | |
| 431 linesBioSeqDB = iBioSeqDB.extractPart(1, iBioSeqDB.getSize() - 1) | |
| 432 | |
| 433 try: | |
| 434 if(FileUtils.isEmpty(inFileName)): | |
| 435 msg = "The input file is empty!" | |
| 436 self._logFile.error(self._prefixeWithLineNumber (msg)) | |
| 437 raise Exception (self._prefixeWithFileName (msg)) | |
| 438 if(self.isHeaderInRefSeqList(referenceBioseq.header)): | |
| 439 msg = "This reference sequence already exists in one previous file!" | |
| 440 self._logFile.error(self._prefixeWithLineNumber (msg)) | |
| 441 raise Exception (self._prefixeWithLineNumber (msg)) | |
| 442 except Exception, e : | |
| 443 raise Exception ("Problem with one input file: \n" + str(e)) | |
| 444 | |
| 445 self._lRefSequences.append(referenceBioseq) | |
| 446 | |
| 447 return ReferenceBioseqAndLinesBioseqDBWrapper(referenceBioseq, linesBioSeqDB, self._logFile, inFileName) | |
| 448 | |
| 449 def isHeaderInRefSeqList(self, header): | |
| 450 isHeader = False | |
| 451 for item in self._lRefSequences: | |
| 452 if item.header == header: | |
| 453 isHeader = True | |
| 454 return isHeader | |
| 455 | |
| 456 def completeBatchList(self): | |
| 457 dBatchResults = {'BatchNumber' : self._currentBatchNumber, 'BatchName' : self._batchName, 'GeneName' : self._geneName,'ContactNumber' : "1", | |
| 458 'ProtocolNumber' : "1", 'ThematicNumber' : "1", 'RefSeqName': self._wrapper._iReferenceBioseq.header} | |
| 459 | |
| 460 self._lBatchFileResults.append(dBatchResults) | |
| 461 | |
| 462 return self._lBatchFileResults | |
| 463 | |
| 464 def getLineAsAHeader(self, lineToBeCheck, lineNumber = 0): | |
| 465 """ | |
| 466 header line begin with the tag(or token) '>' tag | |
| 467 ended with an carriage return | |
| 468 contain The name of sequence must respect this alphabet [a-zA-Z0-9_-:] | |
| 469 """ | |
| 470 obsHeader = lineToBeCheck | |
| 471 if obsHeader[0]!=">" : | |
| 472 msg = "tag '>' omitted before header" | |
| 473 self._logFile.error(self._prefixeWithLineNumber (msg)) | |
| 474 raise Exception (self._prefixeWithLineNumber (msg)) | |
| 475 else : | |
| 476 obsHeader = obsHeader[1:] | |
| 477 obsHeader = obsHeader.replace ("\n","") | |
| 478 obsHeader = self._removeRepeatedBlanksInAStr(obsHeader) | |
| 479 obsHeader = self._replaceBlankByUnderScoreInAStr(obsHeader) | |
| 480 if self.checkHeaderAlphabet(obsHeader) : | |
| 481 return obsHeader | |
| 482 self._logFile.error(self._prefixeWithLineNumber ("fatal error on header")) | |
| 483 raise Exception (self._prefixeWithLineNumber ("fatal error on header")) | |
| 484 | |
| 485 def getLineAsASeq(self, lineToBeCheck): | |
| 486 """ | |
| 487 Sequence line | |
| 488 ended with an carriage return | |
| 489 contain only character of the IUPAC alphabet | |
| 490 """ | |
| 491 obsSeq = str.upper(lineToBeCheck) | |
| 492 obsSeq = obsSeq.replace ("\n","") | |
| 493 obsSeq = obsSeq.replace ("\r","") | |
| 494 obsLine = obsSeq.replace("-","") | |
| 495 if not self.isIUPAC_bases(obsLine) : | |
| 496 msg = "the sequence contain a non nucleic character " | |
| 497 self._logFile.error(self._prefixeWithLineNumber (msg)) | |
| 498 raise Exception (self._prefixeWithLineNumber (msg)) | |
| 499 return obsSeq | |
| 500 | |
| 501 def checkHeaderAlphabet( self, strToCheck): | |
| 502 """ | |
| 503 Check the string | |
| 504 the string is not a header when founding a pattern not corresponding to the regexp | |
| 505 \W Matches any non-alphanumeric character; this is equivalent to the class [^a-zA-Z0-9_-:]. | |
| 506 """ | |
| 507 if strToCheck=="": | |
| 508 return False | |
| 509 p = re.compile('[^a-zA-Z0-9_:\-]', re.IGNORECASE) #p = re.compile('(\W|-|:)+', re.IGNORECASE) | |
| 510 errList=p.findall(strToCheck) | |
| 511 if len( errList ) > 0 : | |
| 512 return False | |
| 513 else: | |
| 514 return True | |
| 515 | |
| 516 ## Check the string is nucleotides sequence from the DNA_ALPHABET_WITH_N = ["A","T","G","C","N"] of IUPAC nomenclature. | |
| 517 # @return True if sequence contain A, T, G, C or N False otherwise | |
| 518 # | |
| 519 def isDNA_bases( self, sequence): | |
| 520 if sequence == "" : | |
| 521 return False | |
| 522 | |
| 523 setFromString = set() | |
| 524 | |
| 525 for nt in sequence : | |
| 526 setFromString.add(nt) | |
| 527 | |
| 528 return setFromString.issubset(DNA_ALPHABET_WITH_N_AND_DELS) | |
| 529 | |
| 530 ## Check if the string is nucleotides sequence from the IUPAC ALPHABET . | |
| 531 # @return True if sequence contain IUPAC letters False otherwise | |
| 532 # | |
| 533 def isIUPAC_bases( self, sequence): | |
| 534 if sequence == "" : | |
| 535 return False | |
| 536 | |
| 537 setFromString = set() | |
| 538 | |
| 539 for nt in sequence : | |
| 540 setFromString.add(nt) | |
| 541 | |
| 542 return setFromString.issubset(IUPAC) | |
| 543 | |
| 544 def _writeAllOutputFiles(self): | |
| 545 writer = Multifasta2SNPFileWriter() | |
| 546 writer.write(self) | |
| 547 | |
| 548 def _sortSubSNPResultByBatchPositionAndLineName(self, lSubSNPResults): | |
| 549 return sorted(lSubSNPResults, key=lambda SNPresults: (SNPresults['batchNumber'], SNPresults['position'], SNPresults['lineName'])) | |
| 550 | |
| 551 def _formatSubSNPName(self, currentLineHeader, position, polymType): | |
| 552 shortPolymType = polymType[:3] | |
| 553 return self._batchName + "_" + shortPolymType + "_" + str(position) + "_" + currentLineHeader | |
| 554 | |
| 555 def _isSNPDetected(self, referenceSequence, index, nt): | |
| 556 if((nt != referenceSequence[index]) and (nt.upper() != "N") and (referenceSequence[index].upper() != "N")): | |
| 557 return True | |
| 558 else: | |
| 559 return False | |
| 560 | |
| 561 def _extractSequencesFromInputFile(self, inFile): | |
| 562 # attention : DNA_ALPHABET_WITH_N_AND_DELS = Set (['A','T','G','C','N']) no including "gap" | |
| 563 lInFileLines = inFile.readlines() | |
| 564 nbOfLines = len(lInFileLines) - 1 | |
| 565 #premiere lecture | |
| 566 self._iCurrentLineNumber = 0 | |
| 567 isSameSeq = False | |
| 568 newSeq = "" | |
| 569 bioseqDB = BioseqDB () | |
| 570 while self._iCurrentLineNumber < nbOfLines : | |
| 571 bioseq = Bioseq() | |
| 572 bioseq.header = self.getLineAsAHeader( lInFileLines[self._iCurrentLineNumber] ) | |
| 573 isSameSeq = True | |
| 574 while isSameSeq and (self._iCurrentLineNumber < nbOfLines) : | |
| 575 self._iCurrentLineNumber +=1 | |
| 576 if lInFileLines[self._iCurrentLineNumber][0] == ">" : | |
| 577 isSameSeq = False | |
| 578 else : | |
| 579 newSeq = newSeq + self.getLineAsASeq( lInFileLines[self._iCurrentLineNumber] ) | |
| 580 isSameSeq = True | |
| 581 bioseq.setSequence(newSeq) | |
| 582 newSeq = "" | |
| 583 bioseqDB.add(bioseq) | |
| 584 return bioseqDB | |
| 585 | |
| 586 def _removeRepeatedBlanksInAStr (self, StrToClean ): | |
| 587 resStr=StrToClean.expandtabs(2) | |
| 588 compResStr=resStr.replace (" "," ") | |
| 589 while compResStr != resStr : | |
| 590 resStr=compResStr | |
| 591 compResStr=resStr.replace (" "," ") | |
| 592 return resStr | |
| 593 | |
| 594 def _replaceBlankByUnderScoreInAStr (self, StrToClean ): | |
| 595 resStr = StrToClean.replace (" ","_") | |
| 596 return resStr | |
| 597 | |
| 598 def _prefixeWithLineNumber (self, strMsg): | |
| 599 resStr = "File: " + self._currentFileName + "\t" | |
| 600 resStr += "Line %i " % (self._iCurrentLineNumber+1 ) + strMsg | |
| 601 return resStr | |
| 602 | |
| 603 def _prefixeWithFileName (self, strMsg): | |
| 604 resStr = "File: " + self._currentFileName + "\n" + strMsg | |
| 605 return resStr | |
| 606 | |
| 607 | |
| 608 def _completeAlleleSetWithCurrentAllele(self, dAlleleFileResults, dnaBase): | |
| 609 if dAlleleFileResults.has_key(dnaBase): | |
| 610 return dAlleleFileResults | |
| 611 else: | |
| 612 iAlleleNumber = len(dAlleleFileResults) + 1 | |
| 613 dAlleleFileResults[dnaBase] = iAlleleNumber | |
| 614 return dAlleleFileResults | |
| 615 | |
| 616 def _completeIndividualListWithCurrentIndividual(self, lIndividualResults, lineName): | |
| 617 if lIndividualResults == []: | |
| 618 iIndividualNumber = 1 | |
| 619 else: | |
| 620 iIndividualNumber = len(lIndividualResults) + 1 | |
| 621 | |
| 622 #TODO: transformer la liste de dictionnaire en liste d'objets | |
| 623 if not (self._checkIfALineExistInList(lIndividualResults, lineName)): | |
| 624 dIndividual2Add = {'individualNumber': iIndividualNumber, 'individualName': lineName, 'scientificName': self._taxon} | |
| 625 lIndividualResults.append(dIndividual2Add) | |
| 626 | |
| 627 return lIndividualResults | |
| 628 | |
| 629 def _completeBatchLineListWithCurrentIndividual(self, lBatchLineResults, lIndividualResults, lineName): | |
| 630 lineDict = self._getALineDictFromADictListWithALineName(lIndividualResults, lineName) | |
| 631 | |
| 632 if len(lineDict) != 0: | |
| 633 if(lineDict.has_key('individualNumber')): | |
| 634 indivNumberOfTheLineDict = lineDict['individualNumber'] | |
| 635 else: | |
| 636 msg = "Problem with the batchLine results construction: individual named " + lineName + " has no individual number!" | |
| 637 self._logFile.error(msg) | |
| 638 raise Exception (msg) | |
| 639 else: | |
| 640 msg = "Problem with the batchLine results construction: individual named " + lineName + " not in individual list!" | |
| 641 self._logFile.error(msg) | |
| 642 raise Exception (msg) | |
| 643 | |
| 644 dResults2Add = {'IndividualNumber': str(indivNumberOfTheLineDict), 'BatchNumber' : self._currentBatchNumber} | |
| 645 lBatchLineResults.append(dResults2Add) | |
| 646 return lBatchLineResults | |
| 647 | |
| 648 def _getALineDictFromADictListWithALineName(self, lDictList, lineName): | |
| 649 dictToReturn = {} | |
| 650 for myDict in lDictList: | |
| 651 if myDict['individualName'] == lineName: | |
| 652 dictToReturn = myDict | |
| 653 | |
| 654 return dictToReturn | |
| 655 | |
| 656 def _checkIfALineExistInList(self, lDictList, lineName): | |
| 657 for myDict in lDictList: | |
| 658 if myDict['individualName'] == lineName: | |
| 659 return True | |
| 660 return False | |
| 661 | |
| 662 def _getCurrentBatchResult(self): | |
| 663 return self._lBatchFileResults[self._currentBatchNumber-1] | |
| 664 | |
| 665 | |
| 666 | |
| 667 | |
| 668 class ReferenceBioseqAndLinesBioseqDBWrapper (object): | |
| 669 | |
| 670 def __init__ (self, iReferenceBioseq, iLinesBioSeqDB, logger, fileName): | |
| 671 self._iReferenceBioseq = iReferenceBioseq | |
| 672 self._iLinesBioseqDB = iLinesBioSeqDB | |
| 673 self._logger = logger | |
| 674 self._currentFileName = fileName | |
| 675 self._checkAllSeqs() | |
| 676 | |
| 677 | |
| 678 def _checkAllSeqs(self): | |
| 679 self._iReferenceBioseq.checkEOF() | |
| 680 refSeqLen = self._iReferenceBioseq.getLength() | |
| 681 | |
| 682 for seq in self._iLinesBioseqDB.db: | |
| 683 seq.checkEOF() | |
| 684 if(not seq.getLength() == refSeqLen): | |
| 685 msg = "File: " + self._currentFileName + ", problem with the sequence " + seq.header + ": its length is different from the reference seq! All the sequences must have the same length.\n" | |
| 686 msg += "refseq length: " + str(refSeqLen) + "\n" | |
| 687 msg += "seq length: " + str(seq.getLength()) + "\n" | |
| 688 self._logger.error(msg) | |
| 689 raise Exception (msg) | |
| 690 | |
| 691 def getLinesBioseqInstances(self): | |
| 692 return self._iLinesBioseqDB.db | |
| 693 | |
| 694 def getReferenceSequence(self): | |
| 695 return self._iReferenceBioseq.sequence | |
| 696 | |
| 697 class Multifasta2SNPFileWriter(object): | |
| 698 | |
| 699 SUB_SNP_FILE_HEADER = ["SubSNPName","ConfidenceValue","Type","Position","5flank", | |
| 700 "3flank","Length","BatchNumber","IndividualNumber","PrimerType","PrimerNumber","Forward_or_Reverse","AlleleNumber"] | |
| 701 | |
| 702 ALLELE_FILE_HEADER = ["AlleleNumber","Value","Motif","NbCopy","Comment"] | |
| 703 | |
| 704 INDIVIDUAL_FILE_HEADER = ["IndividualNumber","IndividualName","Description","AberrAneuploide", | |
| 705 "FractionLength","DeletionLineSynthesis","UrlEarImage","TypeLine","ChromNumber","ArmChrom","DeletionBin","ScientificName", | |
| 706 "local_germplasm_name","submitter_code","local_institute","donor_institute","donor_acc_id"] | |
| 707 | |
| 708 SEQUENCE_CSV_FILE_HEADER = ["SequenceName","SeqType","BankName","BankVersion","ACNumber","Locus","ScientificName"] | |
| 709 | |
| 710 BATCH_TXT_FILE_HEADER = ["BatchNumber", "BatchName", "GeneName", "Description", "ContactNumber", "ProtocolNumber", "ThematicNumber", "RefSeqName", "AlignmentFileName", "SeqName"] | |
| 711 | |
| 712 BATCH_LINE_FILE_HEADER = ["IndividualNumber", "Pos5", "Pos3", "BatchNumber", "Sequence"] | |
| 713 | |
| 714 def __init__(self): | |
| 715 self._csvFieldSeparator = ";" | |
| 716 self._txtSubFieldSeparator = ": " | |
| 717 self._txtFieldSeparator = "\n" | |
| 718 self._primerType = "Sequence" | |
| 719 self._csvLineSeparator = "\n" | |
| 720 self._txtLineSeparator = "//\n" | |
| 721 | |
| 722 def write(self, iMultifasta2SNPFile): | |
| 723 self._writeSubSNPFile(iMultifasta2SNPFile._outSubSNPFileName, iMultifasta2SNPFile._lSubSNPFileResults) | |
| 724 self._writeAlleleFile(iMultifasta2SNPFile._outAlleleFileName, iMultifasta2SNPFile._dAlleleFileResults) | |
| 725 self._writeIndividualFile(iMultifasta2SNPFile._outIndividualFileName, iMultifasta2SNPFile._lIndividualFileResults) | |
| 726 self._writeSequenceFiles(iMultifasta2SNPFile._outSequenceFSAFileName, iMultifasta2SNPFile._outSequenceCSVFileName, iMultifasta2SNPFile._lRefSequences, iMultifasta2SNPFile._taxon) | |
| 727 self._writeBatchFile(iMultifasta2SNPFile._outBatchFileName, iMultifasta2SNPFile._lBatchFileResults) | |
| 728 self._writeBatchLineFile(iMultifasta2SNPFile._outBatchLineFileName, iMultifasta2SNPFile._lBatchLineFileResults) | |
| 729 | |
| 730 def sortAlleleResultByAlleleNumber(self, dAlleleFileResults): | |
| 731 return sorted(dAlleleFileResults.items(), key=lambda(k,v):(v,k)) | |
| 732 | |
| 733 def _writeSubSNPFile(self, subSNPFileName, lSNP2Write): | |
| 734 outF = open(subSNPFileName, "w") | |
| 735 self._writeSNPFileHeader(outF) | |
| 736 for dSNP in lSNP2Write: | |
| 737 self._writeSNPFileLine(outF, dSNP) | |
| 738 outF.close() | |
| 739 | |
| 740 def _writeAlleleFile(self, alleleFileName, dAllele2Write): | |
| 741 outF = open(alleleFileName, "w") | |
| 742 self._writeAlleleFileHeader(outF) | |
| 743 lAlleleSortedResults = self.sortAlleleResultByAlleleNumber(dAllele2Write) | |
| 744 for tAllele in lAlleleSortedResults: | |
| 745 self._writeAlleleFileLine(outF, tAllele[0], tAllele[1]) | |
| 746 | |
| 747 outF.close() | |
| 748 | |
| 749 def _writeIndividualFile(self, individualFileName, lIndividual2Write): | |
| 750 sorted(lIndividual2Write, key=lambda Individual: (Individual['individualNumber'])) | |
| 751 outF = open(individualFileName, "w") | |
| 752 self._writeIndividualFileHeader(outF) | |
| 753 | |
| 754 for dIndiv in lIndividual2Write: | |
| 755 self._writeIndividualFileLine(outF, dIndiv) | |
| 756 | |
| 757 outF.close() | |
| 758 | |
| 759 def _writeSequenceFiles(self, sequenceFSAFileName, sequenceCSVFileName, lRefSequences, taxon): | |
| 760 outFSA = open(sequenceFSAFileName, "w") | |
| 761 outCSV = open(sequenceCSVFileName, "w") | |
| 762 self._writeSequenceCSVHeader(outCSV) | |
| 763 | |
| 764 for refSeq in lRefSequences: | |
| 765 refSeq.cleanGap() | |
| 766 self._writeSequenceFSAFile(outFSA, refSeq) | |
| 767 self._writeSequenceCSVLine(outCSV, refSeq, taxon) | |
| 768 | |
| 769 outFSA.close() | |
| 770 outCSV.close() | |
| 771 | |
| 772 def _writeSequenceFSAFile(self, outF, refSeq): | |
| 773 outF.write( ">%s\n" % ( refSeq.header ) ) | |
| 774 outF.write( "%s\n" % ( refSeq.sequence[0:refSeq.getLength()] ) ) | |
| 775 | |
| 776 | |
| 777 def _writeBatchFile(self, batchFileName, lBatchResults): | |
| 778 outF = open(batchFileName, "w") | |
| 779 for dBatchResults in lBatchResults: | |
| 780 for head in Multifasta2SNPFileWriter.BATCH_TXT_FILE_HEADER[:]: | |
| 781 if dBatchResults.has_key(head): | |
| 782 outF.write(head + self._txtSubFieldSeparator + str(dBatchResults[head]) + self._txtFieldSeparator) | |
| 783 else: | |
| 784 outF.write(head + self._txtSubFieldSeparator + self._txtFieldSeparator) | |
| 785 | |
| 786 outF.write(self._txtLineSeparator) | |
| 787 | |
| 788 outF.close() | |
| 789 | |
| 790 def _writeBatchLineFile(self, batchLineFileName, lBatchLineResults): | |
| 791 outF = open(batchLineFileName, "w") | |
| 792 self._writeBatchLineFileHeader(outF) | |
| 793 for dResult in lBatchLineResults: | |
| 794 self._writeBatchLineFileLine(outF, dResult) | |
| 795 outF.close() | |
| 796 | |
| 797 def _writeSNPFileHeader(self, outF): | |
| 798 for head in Multifasta2SNPFileWriter.SUB_SNP_FILE_HEADER[:-1]: | |
| 799 outF.write(head + self._csvFieldSeparator) | |
| 800 outF.write(Multifasta2SNPFileWriter.SUB_SNP_FILE_HEADER[-1] + self._csvLineSeparator) | |
| 801 | |
| 802 def _writeAlleleFileHeader(self, outF): | |
| 803 for head in Multifasta2SNPFileWriter.ALLELE_FILE_HEADER[:-1]: | |
| 804 outF.write(head + self._csvFieldSeparator) | |
| 805 outF.write(Multifasta2SNPFileWriter.ALLELE_FILE_HEADER[-1] + self._csvLineSeparator) | |
| 806 | |
| 807 def _writeIndividualFileHeader(self, outF): | |
| 808 for head in Multifasta2SNPFileWriter.INDIVIDUAL_FILE_HEADER[:-1]: | |
| 809 outF.write(head + self._csvFieldSeparator) | |
| 810 outF.write(Multifasta2SNPFileWriter.INDIVIDUAL_FILE_HEADER[-1] + self._csvLineSeparator) | |
| 811 | |
| 812 def _writeSequenceCSVHeader(self, outF): | |
| 813 for head in Multifasta2SNPFileWriter.SEQUENCE_CSV_FILE_HEADER[:-1]: | |
| 814 outF.write(head + self._csvFieldSeparator) | |
| 815 outF.write(Multifasta2SNPFileWriter.SEQUENCE_CSV_FILE_HEADER[-1] + self._csvLineSeparator) | |
| 816 | |
| 817 def _writeBatchLineFileHeader(self, outF): | |
| 818 for head in Multifasta2SNPFileWriter.BATCH_LINE_FILE_HEADER[:-1]: | |
| 819 outF.write(head + self._csvFieldSeparator) | |
| 820 outF.write(Multifasta2SNPFileWriter.BATCH_LINE_FILE_HEADER[-1] + self._csvLineSeparator) | |
| 821 | |
| 822 def _writeSNPFileLine(self, outF, dSNP): | |
| 823 outF.write(dSNP['subSNPName'] + self._csvFieldSeparator) | |
| 824 outF.write(dSNP['confidenceValue'] + self._csvFieldSeparator + dSNP['type'] + self._csvFieldSeparator) | |
| 825 outF.write(str(dSNP['position']) + self._csvFieldSeparator + dSNP['5flank'] + self._csvFieldSeparator + dSNP['3flank'] + self._csvFieldSeparator) | |
| 826 outF.write(str(dSNP['length']) + self._csvFieldSeparator + str(dSNP['batchNumber']) + self._csvFieldSeparator) | |
| 827 outF.write(str(dSNP['lineName']) + self._csvFieldSeparator) | |
| 828 outF.write(self._primerType + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + str(dSNP['allele']) + self._csvLineSeparator) | |
| 829 | |
| 830 def _writeAlleleFileLine(self, outF, sAllele2Write, iAlleleNumber): | |
| 831 outF.write(str(iAlleleNumber) + self._csvFieldSeparator) | |
| 832 outF.write(sAllele2Write + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvLineSeparator) | |
| 833 | |
| 834 def _writeIndividualFileLine(self, outF, dIndividual): | |
| 835 outF.write(str(dIndividual['individualNumber']) + self._csvFieldSeparator) | |
| 836 outF.write(dIndividual['individualName'] + self._csvFieldSeparator + self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator+ self._csvFieldSeparator) | |
| 837 outF.write(dIndividual['scientificName'] + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator+ self._csvFieldSeparator + self._csvFieldSeparator + self._csvLineSeparator) | |
| 838 | |
| 839 def _writeSequenceCSVLine(self, outF, refSeq, taxon): | |
| 840 outF.write(refSeq.header + self._csvFieldSeparator) | |
| 841 outF.write("Reference" + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator) | |
| 842 outF.write(taxon + self._csvLineSeparator) | |
| 843 | |
| 844 def _writeBatchLineFileLine(self, outF, dResult): | |
| 845 outF.write(str(dResult['IndividualNumber']) + self._csvFieldSeparator + self._csvFieldSeparator + self._csvFieldSeparator) | |
| 846 outF.write(str(dResult['BatchNumber']) + self._csvFieldSeparator + self._csvLineSeparator) |
