view commons/core/parsing/BlatParser.py @ 53:47310c4fb725

Uploaded
author m-zytnicki
date Fri, 10 Jan 2014 08:57:02 -0500
parents 769e306b7933
children
line wrap: on
line source

# Copyright INRA (Institut National de la Recherche Agronomique)
# http://www.inra.fr
# http://urgi.versailles.inra.fr
#
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software.  You can  use, 
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info". 
#
# As a counterpart to the access to the source code and  rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty  and the software's author,  the holder of the
# economic rights,  and the successive licensors  have only  limited
# liability. 
#
# In this respect, the user's attention is drawn to the risks associated
# with loading,  using,  modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean  that it is complicated to manipulate,  and  that  also
# therefore means  that it is reserved for developers  and  experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or 
# data to be ensured and,  more generally, to use and operate it in the 
# same conditions as regards security. 
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.

import sys

## this class can parse a Blat results output file
#
class BlatParser(object):


    def __init__(self, match='', mismatch='', repMatch='', N='', QGapCount='', QGapBases='', TGapCount='', TGapBases='', strand='', QName='', QSize='', QStart='', QEnd='', TName='', TSize='', TStart='', TEnd='', blockCount='', blockSizes='', qStarts='', tStarts=''):
        self._match = match
        self._mismatch = mismatch
        self._repMatch = repMatch
        self._N = N
        self._QGapCount = QGapCount
        self._QGapBases = QGapBases
        self._TGapCount = TGapCount
        self._TGapBases = TGapBases
        self._strand = strand
        self._QName = QName
        self._QSize = QSize
        self._QStart = QStart
        self._QEnd = QEnd
        self._TName = TName
        self._TSize = TSize
        self._TStart = TStart
        self._TEnd = TEnd
        self._blockCount = blockCount
        self._blockSizes = blockSizes
        self._qStarts = qStarts
        self._tStarts = tStarts
        
    def __eq__(self, o):
        return self._TName == o._TName and self._TSize == o._TSize and self._TStart == o._TStart and self._TEnd == o._TEnd
    
    def setMatch(self, match):
        self._match = match
        
    def setMismatch(self, mismatch):
        self._mismatch = mismatch
        
    def setRepMatch(self, repMatch):
        self._repMatch = repMatch
        
    def setN(self, N):
        self._N = N
        
    def setQGapCount(self, QGapCount):
        self._QGapCount = QGapCount
        
    def setQGapBases(self, QGapBases):
        self._QGapBases = QGapBases
        
    def setTGapCount(self, TGapCount):
        self._TGapCount = TGapCount
        
    def setTGapBases(self, TGapBases):
        self._TGapBases = TGapBases
        
    def setStrand(self, strand):
        self._strand = strand
        
    def setQName(self, QName):
        self._QName = QName
        
    def setQSize(self, QSize):
        self._QSize = QSize
        
    def setQStart(self, QStart):
        self._QStart = QStart
        
    def setQEnd(self, QEnd):
        self._QEnd = QEnd
        
    def setTName(self, TName):
        self._TName = TName
        
    def setTSize(self, TSize):
        self._TSize = TSize
        
    def setTStart(self, TStart):
        self._TStart = TStart
        
    def setTEnd(self, TEnd):
        self._TEnd = TEnd
        
    def setBlockCount(self, blockCount):
        self._blockCount = blockCount
        
    def setBlockSizes(self, blockSizes):
        self._blockSizes = blockSizes
        
    def setQStarts(self, qStarts):
        self._qStarts = qStarts
        
    def setTStarts(self, tStarts):
        self._tStarts = tStarts
        
    def getMatch(self):
        return self._match
        
    def getMismatch(self):
        return self._mismatch
        
    def getRepMatch(self):
        return self._repMatch
        
    def getN(self):
        return self._N
        
    def getQGapCount(self):
        return self._QGapCount
        
    def getQGapBases(self):
        return self._QGapBases
        
    def getTGapCount(self):
        return self._TGapCount
        
    def getTGapBases(self):
        return self._TGapBases
        
    def getStrand(self):
        return self._strand
        
    def getQName(self):
        return self._QName
        
    def getQSize(self):
        return self._QSize
        
    def getQStart(self):
        return self._QStart
        
    def getQEnd(self):
        return self._QEnd
        
    def getTName(self):
        return self._TName
        
    def getTSize(self):
        return self._TSize
        
    def getTStart(self):
        return self._TStart
                
    def getTEnd(self):
        return self._TEnd
                
    def getBlockCount(self):
        return self._blockCount
        
    def getBlockSizes(self):
        return self._blockSizes
        
    def getQStarts(self):
        return self._qStarts
        
    def getTStarts(self):
        return self._tStarts
    
    def setAttributes(self, lResults, iCurrentLineNumber):
        error = False
        
        if lResults[0] != '':
            self.setMatch(lResults[0])
        else:
            sys.stderr.write("WARNING: The field Match is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[1] != '':
            self.setMismatch(lResults[1])
        else:
            sys.stderr.write("WARNING: The field Mismatch is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[2] != '':
            self.setRepMatch(lResults[2])
        else:
            sys.stderr.write("WARNING: The field RepMatch is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[3] != '':
            self.setN(lResults[3])
        else:
            sys.stderr.write("WARNING: The field N is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[4] != '':
            self.setQGapCount(lResults[4])
        else:
            sys.stderr.write("WARNING: The field QGapCount is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[5] != '':
            self.setQGapBases(lResults[5])
        else:
            sys.stderr.write("WARNING: The field QGapBases is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[6] != '':
            self.setTGapCount(lResults[6])
        else:
            sys.stderr.write("WARNING: The field TGapCount is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[7] != '':
            self.setTGapBases(lResults[7])
        else:
            sys.stderr.write("WARNING: The field TGapBases is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[8] != '':
            self.setStrand(lResults[8])
        else:
            sys.stderr.write("WARNING: The field Strand is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[9] != '':
            self.setQName(lResults[9])
        else:
            sys.stderr.write("WARNING: The field QName is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[10] != '':
            self.setQSize(lResults[10])
        else:
            sys.stderr.write("WARNING: The field QSize is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[11] != '':
            self.setQStart(lResults[11])
        else:
            sys.stderr.write("WARNING: The field QStart is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[12] != '':
            self.setQEnd(lResults[12])
        else:
            sys.stderr.write("WARNING: The field QEnd is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[13] != '':
            self.setTName(lResults[13])
        else:
            sys.stderr.write("WARNING: The field TName is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[14] != '':
            self.setTSize(lResults[14])
        else:
            sys.stderr.write("WARNING: The field TSize is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[15] != '':
            self.setTStart(lResults[15])
        else:
            sys.stderr.write("WARNING: The field TStart is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[16] != '':
            self.setTEnd(lResults[16])
        else:
            sys.stderr.write("WARNING: The field TEnd is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[17] != '':
            self.setBlockCount(lResults[17])
        else:
            sys.stderr.write("WARNING: The field BlockCount is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[18] != '':
            self.setBlockSizes(lResults[18])
        else:
            sys.stderr.write("WARNING: The field BlockSizes is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[19] != '':
            self.setQStarts(lResults[19])
        else:
            sys.stderr.write("WARNING: The field QStarts is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
        
        if lResults[20] != '':
            self.setTStarts(lResults[20])
        else:
            sys.stderr.write("WARNING: The field TStarts is empty in blat file in line %s\n" % iCurrentLineNumber)
            error = True
            
        if error == True:
            self._setAllToNull()
            
    def setAttributesFromString(self, blatLine, iCurrentLineNumber ="", fieldSeparator ="\t"):
        blatLine = blatLine.rstrip()
        lBlatLineItem = blatLine.split(fieldSeparator)
        if not len(lBlatLineItem) == 21:
            sys.stderr.write("WARNING: The line %s is not valid blat line (%s columns -> 21 columns needed)\n" % (iCurrentLineNumber, len(lBlatLineItem)))
        else:
            self.setAttributes(lBlatLineItem, iCurrentLineNumber)
            
    def _setAllToNull(self):
        self._match = ''
        self._mismatch = ''
        self._repMatch = ''
        self._N = ''
        self._QGapCount = ''
        self._QGapBases = ''
        self._TGapCount = ''
        self._TGapBases = ''
        self._strand = ''
        self._QName = ''
        self._QSize = ''
        self._QStart = ''
        self._QEnd = ''
        self._TName = ''
        self._TSize = ''
        self._TStart = ''
        self._TEnd = ''
        self._blockCount = ''
        self._blockSizes = ''
        self._qStarts = ''
        self._tStarts = ''