Mercurial > repos > yufei-luo > s_mart
view commons/core/parsing/BlatParser.py @ 47:b6481845eb0d
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 30 Sep 2013 05:51:28 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line source
# Copyright INRA (Institut National de la Recherche Agronomique) # http://www.inra.fr # http://urgi.versailles.inra.fr # # This software is governed by the CeCILL license under French law and # abiding by the rules of distribution of free software. You can use, # modify and/ or redistribute the software under the terms of the CeCILL # license as circulated by CEA, CNRS and INRIA at the following URL # "http://www.cecill.info". # # As a counterpart to the access to the source code and rights to copy, # modify and redistribute granted by the license, users are provided only # with a limited warranty and the software's author, the holder of the # economic rights, and the successive licensors have only limited # liability. # # In this respect, the user's attention is drawn to the risks associated # with loading, using, modifying and/or developing or reproducing the # software by the user in light of its specific status of free software, # that may mean that it is complicated to manipulate, and that also # therefore means that it is reserved for developers and experienced # professionals having in-depth computer knowledge. Users are therefore # encouraged to load and test the software's suitability as regards their # requirements in conditions enabling the security of their systems and/or # data to be ensured and, more generally, to use and operate it in the # same conditions as regards security. # # The fact that you are presently reading this means that you have had # knowledge of the CeCILL license and that you accept its terms. import sys ## this class can parse a Blat results output file # class BlatParser(object): def __init__(self, match='', mismatch='', repMatch='', N='', QGapCount='', QGapBases='', TGapCount='', TGapBases='', strand='', QName='', QSize='', QStart='', QEnd='', TName='', TSize='', TStart='', TEnd='', blockCount='', blockSizes='', qStarts='', tStarts=''): self._match = match self._mismatch = mismatch self._repMatch = repMatch self._N = N self._QGapCount = QGapCount self._QGapBases = QGapBases self._TGapCount = TGapCount self._TGapBases = TGapBases self._strand = strand self._QName = QName self._QSize = QSize self._QStart = QStart self._QEnd = QEnd self._TName = TName self._TSize = TSize self._TStart = TStart self._TEnd = TEnd self._blockCount = blockCount self._blockSizes = blockSizes self._qStarts = qStarts self._tStarts = tStarts def __eq__(self, o): return self._TName == o._TName and self._TSize == o._TSize and self._TStart == o._TStart and self._TEnd == o._TEnd def setMatch(self, match): self._match = match def setMismatch(self, mismatch): self._mismatch = mismatch def setRepMatch(self, repMatch): self._repMatch = repMatch def setN(self, N): self._N = N def setQGapCount(self, QGapCount): self._QGapCount = QGapCount def setQGapBases(self, QGapBases): self._QGapBases = QGapBases def setTGapCount(self, TGapCount): self._TGapCount = TGapCount def setTGapBases(self, TGapBases): self._TGapBases = TGapBases def setStrand(self, strand): self._strand = strand def setQName(self, QName): self._QName = QName def setQSize(self, QSize): self._QSize = QSize def setQStart(self, QStart): self._QStart = QStart def setQEnd(self, QEnd): self._QEnd = QEnd def setTName(self, TName): self._TName = TName def setTSize(self, TSize): self._TSize = TSize def setTStart(self, TStart): self._TStart = TStart def setTEnd(self, TEnd): self._TEnd = TEnd def setBlockCount(self, blockCount): self._blockCount = blockCount def setBlockSizes(self, blockSizes): self._blockSizes = blockSizes def setQStarts(self, qStarts): self._qStarts = qStarts def setTStarts(self, tStarts): self._tStarts = tStarts def getMatch(self): return self._match def getMismatch(self): return self._mismatch def getRepMatch(self): return self._repMatch def getN(self): return self._N def getQGapCount(self): return self._QGapCount def getQGapBases(self): return self._QGapBases def getTGapCount(self): return self._TGapCount def getTGapBases(self): return self._TGapBases def getStrand(self): return self._strand def getQName(self): return self._QName def getQSize(self): return self._QSize def getQStart(self): return self._QStart def getQEnd(self): return self._QEnd def getTName(self): return self._TName def getTSize(self): return self._TSize def getTStart(self): return self._TStart def getTEnd(self): return self._TEnd def getBlockCount(self): return self._blockCount def getBlockSizes(self): return self._blockSizes def getQStarts(self): return self._qStarts def getTStarts(self): return self._tStarts def setAttributes(self, lResults, iCurrentLineNumber): error = False if lResults[0] != '': self.setMatch(lResults[0]) else: sys.stderr.write("WARNING: The field Match is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[1] != '': self.setMismatch(lResults[1]) else: sys.stderr.write("WARNING: The field Mismatch is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[2] != '': self.setRepMatch(lResults[2]) else: sys.stderr.write("WARNING: The field RepMatch is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[3] != '': self.setN(lResults[3]) else: sys.stderr.write("WARNING: The field N is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[4] != '': self.setQGapCount(lResults[4]) else: sys.stderr.write("WARNING: The field QGapCount is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[5] != '': self.setQGapBases(lResults[5]) else: sys.stderr.write("WARNING: The field QGapBases is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[6] != '': self.setTGapCount(lResults[6]) else: sys.stderr.write("WARNING: The field TGapCount is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[7] != '': self.setTGapBases(lResults[7]) else: sys.stderr.write("WARNING: The field TGapBases is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[8] != '': self.setStrand(lResults[8]) else: sys.stderr.write("WARNING: The field Strand is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[9] != '': self.setQName(lResults[9]) else: sys.stderr.write("WARNING: The field QName is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[10] != '': self.setQSize(lResults[10]) else: sys.stderr.write("WARNING: The field QSize is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[11] != '': self.setQStart(lResults[11]) else: sys.stderr.write("WARNING: The field QStart is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[12] != '': self.setQEnd(lResults[12]) else: sys.stderr.write("WARNING: The field QEnd is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[13] != '': self.setTName(lResults[13]) else: sys.stderr.write("WARNING: The field TName is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[14] != '': self.setTSize(lResults[14]) else: sys.stderr.write("WARNING: The field TSize is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[15] != '': self.setTStart(lResults[15]) else: sys.stderr.write("WARNING: The field TStart is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[16] != '': self.setTEnd(lResults[16]) else: sys.stderr.write("WARNING: The field TEnd is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[17] != '': self.setBlockCount(lResults[17]) else: sys.stderr.write("WARNING: The field BlockCount is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[18] != '': self.setBlockSizes(lResults[18]) else: sys.stderr.write("WARNING: The field BlockSizes is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[19] != '': self.setQStarts(lResults[19]) else: sys.stderr.write("WARNING: The field QStarts is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if lResults[20] != '': self.setTStarts(lResults[20]) else: sys.stderr.write("WARNING: The field TStarts is empty in blat file in line %s\n" % iCurrentLineNumber) error = True if error == True: self._setAllToNull() def setAttributesFromString(self, blatLine, iCurrentLineNumber ="", fieldSeparator ="\t"): blatLine = blatLine.rstrip() lBlatLineItem = blatLine.split(fieldSeparator) if not len(lBlatLineItem) == 21: sys.stderr.write("WARNING: The line %s is not valid blat line (%s columns -> 21 columns needed)\n" % (iCurrentLineNumber, len(lBlatLineItem))) else: self.setAttributes(lBlatLineItem, iCurrentLineNumber) def _setAllToNull(self): self._match = '' self._mismatch = '' self._repMatch = '' self._N = '' self._QGapCount = '' self._QGapBases = '' self._TGapCount = '' self._TGapBases = '' self._strand = '' self._QName = '' self._QSize = '' self._QStart = '' self._QEnd = '' self._TName = '' self._TSize = '' self._TStart = '' self._TEnd = '' self._blockCount = '' self._blockSizes = '' self._qStarts = '' self._tStarts = ''