diff collapseTab.py @ 11:b7f1d9f8f3bc

Uploaded
author xuebing
date Sat, 10 Mar 2012 07:59:27 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/collapseTab.py	Sat Mar 10 07:59:27 2012 -0500
@@ -0,0 +1,37 @@
+'''
+collapse tabular files, with key columns, and max columns
+'''
+
+def collapseTab(filename,c_key,c_max):
+    # keeping rows with max value in column c_max
+    nCol = max(max(c_key),c_max)
+    c_max = c_max - 1
+    for i in range(len(c_key)):
+        c_key[i] = c_key[i] - 1
+    uniqintv = {}
+    data = {}
+    f = open(filename)
+    for line in f:
+        flds = line.strip().split('\t')
+        if len(flds) < nCol:
+            continue
+        key = ''
+        for i in c_key:
+            key = key + flds[i-1] # i is 1-based, python is 0-based
+        if not uniqintv.has_key(key):
+            uniqintv[key] = float(flds[c_max])
+            data[key] = flds
+        elif uniqintv[key] < float(flds[c_max]):
+            uniqintv[key] = float(flds[c_max])
+            data[key] = flds
+
+    f.close()        
+    for key in uniqintv.keys():
+        print '\t'.join(data[key])
+        
+import sys
+
+# convert string to number list
+c_key = map(int,sys.argv[2].split(','))
+c_max = int(sys.argv[3])
+collapseTab(sys.argv[1],c_key,c_max)