1
|
1 '''
|
|
2 collapse intervals
|
|
3 '''
|
|
4
|
|
5 def collapseInterval_strand(filename,c_strand,c_score):
|
|
6 # keeping max column c
|
|
7 uniqintv = {}
|
|
8 data = {}
|
|
9 f = open(filename)
|
|
10 header = f.readline()
|
|
11 if 'chr' in header:
|
|
12 flds = header.strip().split('\t')
|
|
13 key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]])
|
|
14 uniqintv[key] = float(flds[c_score])
|
|
15 data[key] = flds
|
|
16 for line in f:
|
|
17 flds = line.strip().split('\t')
|
|
18 key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]])
|
|
19 if not uniqintv.has_key(key):
|
|
20 uniqintv[key] = float(flds[c_score])
|
|
21 data[key] = flds
|
|
22 elif uniqintv[key] < float(flds[c_score]):
|
|
23 uniqintv[key] = float(flds[c_score])
|
|
24 data[key] = flds
|
|
25
|
|
26 f.close()
|
|
27 for key in uniqintv.keys():
|
|
28 print '\t'.join(data[key])
|
|
29
|
|
30 import sys
|
|
31
|
|
32 if sys.argv[2] == '0':#ignore strand
|
|
33 sys.argv[2] = 1
|
|
34 if sys.argv[3] == '0':# ignore score
|
|
35 sys.argv[3] = 2
|
|
36 collapseInterval_strand(sys.argv[1],int(sys.argv[2])-1,int(sys.argv[3])-1)
|