diff mytools/collapseBed.py @ 7:f0dc65e7f6c0

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:59:07 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mytools/collapseBed.py	Fri Mar 09 19:59:07 2012 -0500
@@ -0,0 +1,58 @@
+'''
+collapse intervals
+'''
+
+def collapseInterval_strand(filename):
+    uniqintv = {}
+    data = {}
+    f = open(filename)
+    header = f.readline()
+    if 'chr' in header:
+        flds = header.strip().split('\t')
+        key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
+        uniqintv[key] = 1
+        data[key] = flds
+    for line in f:
+        flds = line.strip().split('\t')
+        key = '\t'.join([flds[0],flds[1],flds[2],flds[5]])
+        if uniqintv.has_key(key):
+            uniqintv[key] = uniqintv[key] + 1
+        else:
+            uniqintv[key] = 1
+            data[key] = flds
+    f.close()        
+    for key in uniqintv.keys():
+        print '\t'.join(data[key]+[str(uniqintv[key])])
+        #flds = key.split('\t')
+        #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key]),flds[3]])
+
+def collapseInterval(filename):
+    uniqintv = {}
+    data = {}
+    f = open(filename)
+    header = f.readline()
+    if 'chr' in header:
+        flds = header.strip().split('\t')
+        key = '\t'.join([flds[0],flds[1],flds[2]])
+        uniqintv[key] = 1
+        data[key] = flds
+    for line in f:
+        flds = line.strip().split('\t')
+        key = '\t'.join([flds[0],flds[1],flds[2]])
+        if uniqintv.has_key(key):
+            uniqintv[key] = uniqintv[key] + 1
+        else:
+            uniqintv[key] = 1
+            data[key] = flds
+    f.close()        
+    for key in uniqintv.keys():
+        print '\t'.join(data[key]+[str(uniqintv[key])])
+        #flds = key.split('\t')
+        #print '\t'.join([flds[0],flds[1],flds[2],'.',str(uniqintv[key])])       
+
+import sys
+
+if sys.argv[2] == 'strand':
+    collapseInterval_strand(sys.argv[1])
+else:
+    collapseInterval(sys.argv[1])