annotate mytools/collapseBed2.py @ 9:87eb5c5ddfe9

Uploaded
author xuebing
date Fri, 09 Mar 2012 20:01:43 -0500
parents f0dc65e7f6c0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
1 '''
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
2 collapse intervals
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
3 '''
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
4
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
5 def collapseInterval_strand(filename,c_strand,c_score):
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
6 # keeping max column c
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
7 uniqintv = {}
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
8 data = {}
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
9 f = open(filename)
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
10 header = f.readline()
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
11 if 'chr' in header:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
12 flds = header.strip().split('\t')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
13 key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
14 uniqintv[key] = float(flds[c_score])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
15 data[key] = flds
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
16 for line in f:
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
17 flds = line.strip().split('\t')
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
18 key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
19 if not uniqintv.has_key(key):
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
20 uniqintv[key] = float(flds[c_score])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
21 data[key] = flds
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
22 elif uniqintv[key] < float(flds[c_score]):
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
23 uniqintv[key] = float(flds[c_score])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
24 data[key] = flds
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
25
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
26 f.close()
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
27 for key in uniqintv.keys():
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
28 print '\t'.join(data[key])
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
29
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
30 import sys
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
31
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
32 if sys.argv[2] == '0':#ignore strand
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
33 sys.argv[2] = 1
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
34 if sys.argv[3] == '0':# ignore score
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
35 sys.argv[3] = 2
f0dc65e7f6c0 Uploaded
xuebing
parents:
diff changeset
36 collapseInterval_strand(sys.argv[1],int(sys.argv[2])-1,int(sys.argv[3])-1)