annotate collapseBed.py @ 13:292186c14b08

Uploaded
author xuebing
date Sat, 10 Mar 2012 08:17:36 -0500
parents b7f1d9f8f3bc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
1 '''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
2 collapse intervals
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
3 '''
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
4
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
5 def collapseInterval_strand(filename):
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
6 uniqintv = {}
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
7 data = {}
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
8 f = open(filename)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
9 header = f.readline()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
10 if 'chr' in header:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
11 flds = header.strip().split('\t')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
12 key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
13 uniqintv[key] = 1
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
14 data[key] = flds
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
15 for line in f:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
16 flds = line.strip().split('\t')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
17 key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
18 if uniqintv.has_key(key):
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
19 uniqintv[key] = uniqintv[key] + 1
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
20 else:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
21 uniqintv[key] = 1
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
22 data[key] = flds
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
23 f.close()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
24 for key in uniqintv.keys():
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
25 print '\t'.join(data[key]+[str(uniqintv[key])])
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
26 #flds = key.split('\t')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
27 #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key]),flds[3]])
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
28
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
29 def collapseInterval(filename):
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
30 uniqintv = {}
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
31 data = {}
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
32 f = open(filename)
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
33 header = f.readline()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
34 if 'chr' in header:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
35 flds = header.strip().split('\t')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
36 key = '\t'.join([flds[0],flds[1],flds[2]])
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
37 uniqintv[key] = 1
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
38 data[key] = flds
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
39 for line in f:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
40 flds = line.strip().split('\t')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
41 key = '\t'.join([flds[0],flds[1],flds[2]])
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
42 if uniqintv.has_key(key):
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
43 uniqintv[key] = uniqintv[key] + 1
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
44 else:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
45 uniqintv[key] = 1
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
46 data[key] = flds
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
47 f.close()
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
48 for key in uniqintv.keys():
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
49 print '\t'.join(data[key]+[str(uniqintv[key])])
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
50 #flds = key.split('\t')
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
51 #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key])])
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
52
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
53 import sys
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
54
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
55 if sys.argv[2] == 'strand':
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
56 collapseInterval_strand(sys.argv[1])
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
57 else:
b7f1d9f8f3bc Uploaded
xuebing
parents:
diff changeset
58 collapseInterval(sys.argv[1])