comparison mytools/collapseBed.py @ 7:f0dc65e7f6c0

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:59:07 -0500
parents
children
comparison
equal deleted inserted replaced
6:094f377db4e5 7:f0dc65e7f6c0
1 '''
2 collapse intervals
3 '''
4
5 def collapseInterval_strand(filename):
6 uniqintv = {}
7 data = {}
8 f = open(filename)
9 header = f.readline()
10 if 'chr' in header:
11 flds = header.strip().split('\t')
12 key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
13 uniqintv[key] = 1
14 data[key] = flds
15 for line in f:
16 flds = line.strip().split('\t')
17 key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
18 if uniqintv.has_key(key):
19 uniqintv[key] = uniqintv[key] + 1
20 else:
21 uniqintv[key] = 1
22 data[key] = flds
23 f.close()
24 for key in uniqintv.keys():
25 print '\t'.join(data[key]+[str(uniqintv[key])])
26 #flds = key.split('\t')
27 #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key]),flds[3]])
28
29 def collapseInterval(filename):
30 uniqintv = {}
31 data = {}
32 f = open(filename)
33 header = f.readline()
34 if 'chr' in header:
35 flds = header.strip().split('\t')
36 key = '\t'.join([flds[0],flds[1],flds[2]])
37 uniqintv[key] = 1
38 data[key] = flds
39 for line in f:
40 flds = line.strip().split('\t')
41 key = '\t'.join([flds[0],flds[1],flds[2]])
42 if uniqintv.has_key(key):
43 uniqintv[key] = uniqintv[key] + 1
44 else:
45 uniqintv[key] = 1
46 data[key] = flds
47 f.close()
48 for key in uniqintv.keys():
49 print '\t'.join(data[key]+[str(uniqintv[key])])
50 #flds = key.split('\t')
51 #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key])])
52
53 import sys
54
55 if sys.argv[2] == 'strand':
56 collapseInterval_strand(sys.argv[1])
57 else:
58 collapseInterval(sys.argv[1])