0
|
1 '''
|
|
2 collapse intervals
|
|
3 '''
|
|
4
|
|
5 def collapseInterval_strand(filename):
|
|
6 uniqintv = {}
|
|
7 data = {}
|
|
8 f = open(filename)
|
|
9 header = f.readline()
|
|
10 if 'chr' in header:
|
|
11 flds = header.strip().split('\t')
|
|
12 key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
|
|
13 uniqintv[key] = 1
|
|
14 data[key] = flds
|
|
15 for line in f:
|
|
16 flds = line.strip().split('\t')
|
|
17 key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
|
|
18 if uniqintv.has_key(key):
|
|
19 uniqintv[key] = uniqintv[key] + 1
|
|
20 else:
|
|
21 uniqintv[key] = 1
|
|
22 data[key] = flds
|
|
23 f.close()
|
|
24 for key in uniqintv.keys():
|
|
25 print '\t'.join(data[key]+[str(uniqintv[key])])
|
|
26 #flds = key.split('\t')
|
|
27 #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key]),flds[3]])
|
|
28
|
|
29 def collapseInterval(filename):
|
|
30 uniqintv = {}
|
|
31 data = {}
|
|
32 f = open(filename)
|
|
33 header = f.readline()
|
|
34 if 'chr' in header:
|
|
35 flds = header.strip().split('\t')
|
|
36 key = '\t'.join([flds[0],flds[1],flds[2]])
|
|
37 uniqintv[key] = 1
|
|
38 data[key] = flds
|
|
39 for line in f:
|
|
40 flds = line.strip().split('\t')
|
|
41 key = '\t'.join([flds[0],flds[1],flds[2]])
|
|
42 if uniqintv.has_key(key):
|
|
43 uniqintv[key] = uniqintv[key] + 1
|
|
44 else:
|
|
45 uniqintv[key] = 1
|
|
46 data[key] = flds
|
|
47 f.close()
|
|
48 for key in uniqintv.keys():
|
|
49 print '\t'.join(data[key]+[str(uniqintv[key])])
|
|
50 #flds = key.split('\t')
|
|
51 #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key])])
|
|
52
|
|
53 import sys
|
|
54
|
|
55 if sys.argv[2] == 'strand':
|
|
56 collapseInterval_strand(sys.argv[1])
|
|
57 else:
|
|
58 collapseInterval(sys.argv[1])
|