annotate mytools/collapseTab.py @ 7:f0dc65e7f6c0

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:59:07 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
1 '''
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
2 collapse tabular files, with key columns, and max columns
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
3 '''
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
4
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
5 def collapseTab(filename,c_key,c_max):
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
6 # keeping rows with max value in column c_max
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
7 nCol = max(max(c_key),c_max)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
8 c_max = c_max - 1
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
9 for i in range(len(c_key)):
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
10 c_key[i] = c_key[i] - 1
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
11 uniqintv = {}
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
12 data = {}
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
13 f = open(filename)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
14 for line in f:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
15 flds = line.strip().split('\t')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
16 if len(flds) < nCol:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
17 continue
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
18 key = ''
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
19 for i in c_key:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
20 key = key + flds[i-1] # i is 1-based, python is 0-based
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
21 if not uniqintv.has_key(key):
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
22 uniqintv[key] = float(flds[c_max])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
23 data[key] = flds
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
24 elif uniqintv[key] < float(flds[c_max]):
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
25 uniqintv[key] = float(flds[c_max])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
26 data[key] = flds
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
27
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
28 f.close()
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
29 for key in uniqintv.keys():
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
30 print '\t'.join(data[key])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
31
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
32 import sys
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
33
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
34 # convert string to number list
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
35 c_key = map(int,sys.argv[2].split(','))
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
36 c_max = int(sys.argv[3])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
37 collapseTab(sys.argv[1],c_key,c_max)