diff commons/core/parsing/PathNum2Id.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/core/parsing/PathNum2Id.py	Fri Jan 18 04:54:14 2013 -0500
@@ -0,0 +1,47 @@
+class PathNum2Id( object ):
+
+    def __init__(self):
+        self._inFileName = None
+        self._outFileName = None
+
+    def setInFileName(self, fileName):  
+        self._inFileName = fileName  
+        
+    def setOutFileName(self, fileName):  
+        self._outFileName = fileName        
+        
+    def run( self ):
+        """
+        Adapt the path IDs as the input file is the concatenation of several 'path' files.
+        """
+        self._inFile = open( self._inFileName, "r" )
+        self._outFile = open( self._outFileName, "w" )
+        lines = self._inFile.readlines()
+        dID2count = {}
+        count = 1
+        for line in lines:
+            if line == "":
+                break
+            strippedLine = line.strip('\n')
+            data = strippedLine.split("\t")
+            path = data[0]
+            qryName = data[1]
+            qryStart = int(data[2])
+            qryEnd = int(data[3])
+            sbjName = data[4]
+            sbjStart = int(data[5])
+            sbjEnd = int(data[6])
+            BLAST_Eval = data[7]
+            BLAST_score = data[8]
+            percId = data[9]
+            key_id = path + "-" + qryName + "-" + sbjName
+            if key_id not in dID2count.keys():
+                newPath = count
+                count += 1
+                dID2count[ key_id ] = newPath
+            else:
+                newPath = dID2count[ key_id ]
+            cmd = "%i\t%s\t%i\t%i\t%s\t%i\t%i\t%s\t%s\t%s\n" % ( newPath, qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, BLAST_Eval, BLAST_score, percId )
+            self._outFile.write( cmd )
+        self._inFile.close()
+        self._outFile.close()