diff smart_toolShed/commons/core/parsing/BlatFileParser.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/smart_toolShed/commons/core/parsing/BlatFileParser.py	Thu Jan 17 10:52:14 2013 -0500
@@ -0,0 +1,63 @@
+from commons.core.parsing.BlatParser import BlatParser
+import os
+
+class BlatFileParser(object):
+
+    def __init__(self, blatFileName = None):
+        self._blatFileName = blatFileName
+        self._lBlatHits = []
+        self._dBlatHitsByQueries = {}
+        self._dQueries = {}
+        
+    def getDictOfQueries(self):
+        return self._dQueries
+    
+    def getResultLinesOfOneQuery(self, queryName):
+        return self._dBlatHitsByQueries[queryName]
+    
+    def getDictOfBlatHitsByQueries(self):
+        return self._dBlatHitsByQueries
+    
+    def getListsOfHits(self):
+        return self._lBlatHits
+    
+    def parseBlatFile(self):
+        blatFile = open(self._blatFileName, 'r')
+        line = blatFile.readline()
+        n = 1
+        while line != "":
+            if self._isInteger(line.split("\t")[0]):
+                iBlatParser = BlatParser()
+                iBlatParser.setAttributesFromString(line, n)
+                queryHeader = iBlatParser.getQName()
+                self._dQueries[queryHeader] = 1
+                self._lBlatHits.append(iBlatParser)
+            line = blatFile.readline()
+            n += 1
+        return self._lBlatHits
+    
+    def parseBlatFileByQueries(self):
+        blatFile = open(self._blatFileName, 'r')
+        line = blatFile.readline()
+        n = 1
+        while line != "":
+            if self._isInteger(line.split("\t")[0]):
+                iBlatParser = BlatParser()
+                iBlatParser.setAttributesFromString(line, n)
+                queryHeader = iBlatParser.getQName()
+                self._dQueries[queryHeader] = 1
+                if self._dBlatHitsByQueries.has_key(queryHeader):
+                    self._dBlatHitsByQueries[queryHeader].append(iBlatParser)
+                else:
+                    self._dBlatHitsByQueries[queryHeader] = [iBlatParser]
+            line = blatFile.readline()
+            n += 1
+        blatFile.close()
+        return self._dBlatHitsByQueries
+        
+    def _isInteger(self, string):
+        try:
+            int(string)
+            return True
+        except ValueError:
+            return False