diff commons/core/parsing/BlatParser.py @ 38:2c0c0a89fad7

Uploaded
author m-zytnicki
date Thu, 02 May 2013 09:56:47 -0400
parents 769e306b7933
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/BlatParser.py	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,351 @@
+# Copyright INRA (Institut National de la Recherche Agronomique)
+# http://www.inra.fr
+# http://urgi.versailles.inra.fr
+#
+# This software is governed by the CeCILL license under French law and
+# abiding by the rules of distribution of free software.  You can  use, 
+# modify and/ or redistribute the software under the terms of the CeCILL
+# license as circulated by CEA, CNRS and INRIA at the following URL
+# "http://www.cecill.info". 
+#
+# As a counterpart to the access to the source code and  rights to copy,
+# modify and redistribute granted by the license, users are provided only
+# with a limited warranty  and the software's author,  the holder of the
+# economic rights,  and the successive licensors  have only  limited
+# liability. 
+#
+# In this respect, the user's attention is drawn to the risks associated
+# with loading,  using,  modifying and/or developing or reproducing the
+# software by the user in light of its specific status of free software,
+# that may mean  that it is complicated to manipulate,  and  that  also
+# therefore means  that it is reserved for developers  and  experienced
+# professionals having in-depth computer knowledge. Users are therefore
+# encouraged to load and test the software's suitability as regards their
+# requirements in conditions enabling the security of their systems and/or 
+# data to be ensured and,  more generally, to use and operate it in the 
+# same conditions as regards security. 
+#
+# The fact that you are presently reading this means that you have had
+# knowledge of the CeCILL license and that you accept its terms.
+
+import sys
+
+## this class can parse a Blat results output file
+#
+class BlatParser(object):
+
+
+    def __init__(self, match='', mismatch='', repMatch='', N='', QGapCount='', QGapBases='', TGapCount='', TGapBases='', strand='', QName='', QSize='', QStart='', QEnd='', TName='', TSize='', TStart='', TEnd='', blockCount='', blockSizes='', qStarts='', tStarts=''):
+        self._match = match
+        self._mismatch = mismatch
+        self._repMatch = repMatch
+        self._N = N
+        self._QGapCount = QGapCount
+        self._QGapBases = QGapBases
+        self._TGapCount = TGapCount
+        self._TGapBases = TGapBases
+        self._strand = strand
+        self._QName = QName
+        self._QSize = QSize
+        self._QStart = QStart
+        self._QEnd = QEnd
+        self._TName = TName
+        self._TSize = TSize
+        self._TStart = TStart
+        self._TEnd = TEnd
+        self._blockCount = blockCount
+        self._blockSizes = blockSizes
+        self._qStarts = qStarts
+        self._tStarts = tStarts
+        
+    def __eq__(self, o):
+        return self._TName == o._TName and self._TSize == o._TSize and self._TStart == o._TStart and self._TEnd == o._TEnd
+    
+    def setMatch(self, match):
+        self._match = match
+        
+    def setMismatch(self, mismatch):
+        self._mismatch = mismatch
+        
+    def setRepMatch(self, repMatch):
+        self._repMatch = repMatch
+        
+    def setN(self, N):
+        self._N = N
+        
+    def setQGapCount(self, QGapCount):
+        self._QGapCount = QGapCount
+        
+    def setQGapBases(self, QGapBases):
+        self._QGapBases = QGapBases
+        
+    def setTGapCount(self, TGapCount):
+        self._TGapCount = TGapCount
+        
+    def setTGapBases(self, TGapBases):
+        self._TGapBases = TGapBases
+        
+    def setStrand(self, strand):
+        self._strand = strand
+        
+    def setQName(self, QName):
+        self._QName = QName
+        
+    def setQSize(self, QSize):
+        self._QSize = QSize
+        
+    def setQStart(self, QStart):
+        self._QStart = QStart
+        
+    def setQEnd(self, QEnd):
+        self._QEnd = QEnd
+        
+    def setTName(self, TName):
+        self._TName = TName
+        
+    def setTSize(self, TSize):
+        self._TSize = TSize
+        
+    def setTStart(self, TStart):
+        self._TStart = TStart
+        
+    def setTEnd(self, TEnd):
+        self._TEnd = TEnd
+        
+    def setBlockCount(self, blockCount):
+        self._blockCount = blockCount
+        
+    def setBlockSizes(self, blockSizes):
+        self._blockSizes = blockSizes
+        
+    def setQStarts(self, qStarts):
+        self._qStarts = qStarts
+        
+    def setTStarts(self, tStarts):
+        self._tStarts = tStarts
+        
+    def getMatch(self):
+        return self._match
+        
+    def getMismatch(self):
+        return self._mismatch
+        
+    def getRepMatch(self):
+        return self._repMatch
+        
+    def getN(self):
+        return self._N
+        
+    def getQGapCount(self):
+        return self._QGapCount
+        
+    def getQGapBases(self):
+        return self._QGapBases
+        
+    def getTGapCount(self):
+        return self._TGapCount
+        
+    def getTGapBases(self):
+        return self._TGapBases
+        
+    def getStrand(self):
+        return self._strand
+        
+    def getQName(self):
+        return self._QName
+        
+    def getQSize(self):
+        return self._QSize
+        
+    def getQStart(self):
+        return self._QStart
+        
+    def getQEnd(self):
+        return self._QEnd
+        
+    def getTName(self):
+        return self._TName
+        
+    def getTSize(self):
+        return self._TSize
+        
+    def getTStart(self):
+        return self._TStart
+                
+    def getTEnd(self):
+        return self._TEnd
+                
+    def getBlockCount(self):
+        return self._blockCount
+        
+    def getBlockSizes(self):
+        return self._blockSizes
+        
+    def getQStarts(self):
+        return self._qStarts
+        
+    def getTStarts(self):
+        return self._tStarts
+    
+    def setAttributes(self, lResults, iCurrentLineNumber):
+        error = False
+        
+        if lResults[0] != '':
+            self.setMatch(lResults[0])
+        else:
+            sys.stderr.write("WARNING: The field Match is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[1] != '':
+            self.setMismatch(lResults[1])
+        else:
+            sys.stderr.write("WARNING: The field Mismatch is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[2] != '':
+            self.setRepMatch(lResults[2])
+        else:
+            sys.stderr.write("WARNING: The field RepMatch is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[3] != '':
+            self.setN(lResults[3])
+        else:
+            sys.stderr.write("WARNING: The field N is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[4] != '':
+            self.setQGapCount(lResults[4])
+        else:
+            sys.stderr.write("WARNING: The field QGapCount is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[5] != '':
+            self.setQGapBases(lResults[5])
+        else:
+            sys.stderr.write("WARNING: The field QGapBases is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[6] != '':
+            self.setTGapCount(lResults[6])
+        else:
+            sys.stderr.write("WARNING: The field TGapCount is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[7] != '':
+            self.setTGapBases(lResults[7])
+        else:
+            sys.stderr.write("WARNING: The field TGapBases is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[8] != '':
+            self.setStrand(lResults[8])
+        else:
+            sys.stderr.write("WARNING: The field Strand is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[9] != '':
+            self.setQName(lResults[9])
+        else:
+            sys.stderr.write("WARNING: The field QName is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[10] != '':
+            self.setQSize(lResults[10])
+        else:
+            sys.stderr.write("WARNING: The field QSize is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[11] != '':
+            self.setQStart(lResults[11])
+        else:
+            sys.stderr.write("WARNING: The field QStart is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[12] != '':
+            self.setQEnd(lResults[12])
+        else:
+            sys.stderr.write("WARNING: The field QEnd is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[13] != '':
+            self.setTName(lResults[13])
+        else:
+            sys.stderr.write("WARNING: The field TName is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[14] != '':
+            self.setTSize(lResults[14])
+        else:
+            sys.stderr.write("WARNING: The field TSize is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[15] != '':
+            self.setTStart(lResults[15])
+        else:
+            sys.stderr.write("WARNING: The field TStart is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[16] != '':
+            self.setTEnd(lResults[16])
+        else:
+            sys.stderr.write("WARNING: The field TEnd is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[17] != '':
+            self.setBlockCount(lResults[17])
+        else:
+            sys.stderr.write("WARNING: The field BlockCount is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[18] != '':
+            self.setBlockSizes(lResults[18])
+        else:
+            sys.stderr.write("WARNING: The field BlockSizes is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[19] != '':
+            self.setQStarts(lResults[19])
+        else:
+            sys.stderr.write("WARNING: The field QStarts is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+        
+        if lResults[20] != '':
+            self.setTStarts(lResults[20])
+        else:
+            sys.stderr.write("WARNING: The field TStarts is empty in blat file in line %s\n" % iCurrentLineNumber)
+            error = True
+            
+        if error == True:
+            self._setAllToNull()
+            
+    def setAttributesFromString(self, blatLine, iCurrentLineNumber ="", fieldSeparator ="\t"):
+        blatLine = blatLine.rstrip()
+        lBlatLineItem = blatLine.split(fieldSeparator)
+        if not len(lBlatLineItem) == 21:
+            sys.stderr.write("WARNING: The line %s is not valid blat line (%s columns -> 21 columns needed)\n" % (iCurrentLineNumber, len(lBlatLineItem)))
+        else:
+            self.setAttributes(lBlatLineItem, iCurrentLineNumber)
+            
+    def _setAllToNull(self):
+        self._match = ''
+        self._mismatch = ''
+        self._repMatch = ''
+        self._N = ''
+        self._QGapCount = ''
+        self._QGapBases = ''
+        self._TGapCount = ''
+        self._TGapBases = ''
+        self._strand = ''
+        self._QName = ''
+        self._QSize = ''
+        self._QStart = ''
+        self._QEnd = ''
+        self._TName = ''
+        self._TSize = ''
+        self._TStart = ''
+        self._TEnd = ''
+        self._blockCount = ''
+        self._blockSizes = ''
+        self._qStarts = ''
+        self._tStarts = ''
\ No newline at end of file