Mercurial > repos > yufei-luo > s_mart
diff commons/core/parsing/BlatParser.py @ 38:2c0c0a89fad7
Uploaded
author | m-zytnicki |
---|---|
date | Thu, 02 May 2013 09:56:47 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/core/parsing/BlatParser.py Thu May 02 09:56:47 2013 -0400 @@ -0,0 +1,351 @@ +# Copyright INRA (Institut National de la Recherche Agronomique) +# http://www.inra.fr +# http://urgi.versailles.inra.fr +# +# This software is governed by the CeCILL license under French law and +# abiding by the rules of distribution of free software. You can use, +# modify and/ or redistribute the software under the terms of the CeCILL +# license as circulated by CEA, CNRS and INRIA at the following URL +# "http://www.cecill.info". +# +# As a counterpart to the access to the source code and rights to copy, +# modify and redistribute granted by the license, users are provided only +# with a limited warranty and the software's author, the holder of the +# economic rights, and the successive licensors have only limited +# liability. +# +# In this respect, the user's attention is drawn to the risks associated +# with loading, using, modifying and/or developing or reproducing the +# software by the user in light of its specific status of free software, +# that may mean that it is complicated to manipulate, and that also +# therefore means that it is reserved for developers and experienced +# professionals having in-depth computer knowledge. Users are therefore +# encouraged to load and test the software's suitability as regards their +# requirements in conditions enabling the security of their systems and/or +# data to be ensured and, more generally, to use and operate it in the +# same conditions as regards security. +# +# The fact that you are presently reading this means that you have had +# knowledge of the CeCILL license and that you accept its terms. + +import sys + +## this class can parse a Blat results output file +# +class BlatParser(object): + + + def __init__(self, match='', mismatch='', repMatch='', N='', QGapCount='', QGapBases='', TGapCount='', TGapBases='', strand='', QName='', QSize='', QStart='', QEnd='', TName='', TSize='', TStart='', TEnd='', blockCount='', blockSizes='', qStarts='', tStarts=''): + self._match = match + self._mismatch = mismatch + self._repMatch = repMatch + self._N = N + self._QGapCount = QGapCount + self._QGapBases = QGapBases + self._TGapCount = TGapCount + self._TGapBases = TGapBases + self._strand = strand + self._QName = QName + self._QSize = QSize + self._QStart = QStart + self._QEnd = QEnd + self._TName = TName + self._TSize = TSize + self._TStart = TStart + self._TEnd = TEnd + self._blockCount = blockCount + self._blockSizes = blockSizes + self._qStarts = qStarts + self._tStarts = tStarts + + def __eq__(self, o): + return self._TName == o._TName and self._TSize == o._TSize and self._TStart == o._TStart and self._TEnd == o._TEnd + + def setMatch(self, match): + self._match = match + + def setMismatch(self, mismatch): + self._mismatch = mismatch + + def setRepMatch(self, repMatch): + self._repMatch = repMatch + + def setN(self, N): + self._N = N + + def setQGapCount(self, QGapCount): + self._QGapCount = QGapCount + + def setQGapBases(self, QGapBases): + self._QGapBases = QGapBases + + def setTGapCount(self, TGapCount): + self._TGapCount = TGapCount + + def setTGapBases(self, TGapBases): + self._TGapBases = TGapBases + + def setStrand(self, strand): + self._strand = strand + + def setQName(self, QName): + self._QName = QName + + def setQSize(self, QSize): + self._QSize = QSize + + def setQStart(self, QStart): + self._QStart = QStart + + def setQEnd(self, QEnd): + self._QEnd = QEnd + + def setTName(self, TName): + self._TName = TName + + def setTSize(self, TSize): + self._TSize = TSize + + def setTStart(self, TStart): + self._TStart = TStart + + def setTEnd(self, TEnd): + self._TEnd = TEnd + + def setBlockCount(self, blockCount): + self._blockCount = blockCount + + def setBlockSizes(self, blockSizes): + self._blockSizes = blockSizes + + def setQStarts(self, qStarts): + self._qStarts = qStarts + + def setTStarts(self, tStarts): + self._tStarts = tStarts + + def getMatch(self): + return self._match + + def getMismatch(self): + return self._mismatch + + def getRepMatch(self): + return self._repMatch + + def getN(self): + return self._N + + def getQGapCount(self): + return self._QGapCount + + def getQGapBases(self): + return self._QGapBases + + def getTGapCount(self): + return self._TGapCount + + def getTGapBases(self): + return self._TGapBases + + def getStrand(self): + return self._strand + + def getQName(self): + return self._QName + + def getQSize(self): + return self._QSize + + def getQStart(self): + return self._QStart + + def getQEnd(self): + return self._QEnd + + def getTName(self): + return self._TName + + def getTSize(self): + return self._TSize + + def getTStart(self): + return self._TStart + + def getTEnd(self): + return self._TEnd + + def getBlockCount(self): + return self._blockCount + + def getBlockSizes(self): + return self._blockSizes + + def getQStarts(self): + return self._qStarts + + def getTStarts(self): + return self._tStarts + + def setAttributes(self, lResults, iCurrentLineNumber): + error = False + + if lResults[0] != '': + self.setMatch(lResults[0]) + else: + sys.stderr.write("WARNING: The field Match is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[1] != '': + self.setMismatch(lResults[1]) + else: + sys.stderr.write("WARNING: The field Mismatch is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[2] != '': + self.setRepMatch(lResults[2]) + else: + sys.stderr.write("WARNING: The field RepMatch is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[3] != '': + self.setN(lResults[3]) + else: + sys.stderr.write("WARNING: The field N is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[4] != '': + self.setQGapCount(lResults[4]) + else: + sys.stderr.write("WARNING: The field QGapCount is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[5] != '': + self.setQGapBases(lResults[5]) + else: + sys.stderr.write("WARNING: The field QGapBases is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[6] != '': + self.setTGapCount(lResults[6]) + else: + sys.stderr.write("WARNING: The field TGapCount is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[7] != '': + self.setTGapBases(lResults[7]) + else: + sys.stderr.write("WARNING: The field TGapBases is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[8] != '': + self.setStrand(lResults[8]) + else: + sys.stderr.write("WARNING: The field Strand is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[9] != '': + self.setQName(lResults[9]) + else: + sys.stderr.write("WARNING: The field QName is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[10] != '': + self.setQSize(lResults[10]) + else: + sys.stderr.write("WARNING: The field QSize is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[11] != '': + self.setQStart(lResults[11]) + else: + sys.stderr.write("WARNING: The field QStart is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[12] != '': + self.setQEnd(lResults[12]) + else: + sys.stderr.write("WARNING: The field QEnd is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[13] != '': + self.setTName(lResults[13]) + else: + sys.stderr.write("WARNING: The field TName is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[14] != '': + self.setTSize(lResults[14]) + else: + sys.stderr.write("WARNING: The field TSize is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[15] != '': + self.setTStart(lResults[15]) + else: + sys.stderr.write("WARNING: The field TStart is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[16] != '': + self.setTEnd(lResults[16]) + else: + sys.stderr.write("WARNING: The field TEnd is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[17] != '': + self.setBlockCount(lResults[17]) + else: + sys.stderr.write("WARNING: The field BlockCount is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[18] != '': + self.setBlockSizes(lResults[18]) + else: + sys.stderr.write("WARNING: The field BlockSizes is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[19] != '': + self.setQStarts(lResults[19]) + else: + sys.stderr.write("WARNING: The field QStarts is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if lResults[20] != '': + self.setTStarts(lResults[20]) + else: + sys.stderr.write("WARNING: The field TStarts is empty in blat file in line %s\n" % iCurrentLineNumber) + error = True + + if error == True: + self._setAllToNull() + + def setAttributesFromString(self, blatLine, iCurrentLineNumber ="", fieldSeparator ="\t"): + blatLine = blatLine.rstrip() + lBlatLineItem = blatLine.split(fieldSeparator) + if not len(lBlatLineItem) == 21: + sys.stderr.write("WARNING: The line %s is not valid blat line (%s columns -> 21 columns needed)\n" % (iCurrentLineNumber, len(lBlatLineItem))) + else: + self.setAttributes(lBlatLineItem, iCurrentLineNumber) + + def _setAllToNull(self): + self._match = '' + self._mismatch = '' + self._repMatch = '' + self._N = '' + self._QGapCount = '' + self._QGapBases = '' + self._TGapCount = '' + self._TGapBases = '' + self._strand = '' + self._QName = '' + self._QSize = '' + self._QStart = '' + self._QEnd = '' + self._TName = '' + self._TSize = '' + self._TStart = '' + self._TEnd = '' + self._blockCount = '' + self._blockSizes = '' + self._qStarts = '' + self._tStarts = '' \ No newline at end of file