# HG changeset patch # User xuebing # Date 1333214744 14400 # Node ID ce94ba22a3b36abc480674f84ca6bf7bd2fdaf51 # Parent 082c1e8ff413a35fffd739451e7be92e17f7fb81 Uploaded diff -r 082c1e8ff413 -r ce94ba22a3b3 bed_collapse.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bed_collapse.py Sat Mar 31 13:25:44 2012 -0400 @@ -0,0 +1,36 @@ +''' +collapse intervals +''' + +def collapseInterval_strand(filename,c_strand,c_score): + # keeping max column c + uniqintv = {} + data = {} + f = open(filename) + header = f.readline() + if 'chr' in header: + flds = header.strip().split('\t') + key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]]) + uniqintv[key] = float(flds[c_score]) + data[key] = flds + for line in f: + flds = line.strip().split('\t') + key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]]) + if not uniqintv.has_key(key): + uniqintv[key] = float(flds[c_score]) + data[key] = flds + elif uniqintv[key] < float(flds[c_score]): + uniqintv[key] = float(flds[c_score]) + data[key] = flds + + f.close() + for key in uniqintv.keys(): + print '\t'.join(data[key]) + +import sys + +if sys.argv[2] == '0':#ignore strand + sys.argv[2] = 1 +if sys.argv[3] == '0':# ignore score + sys.argv[3] = 2 +collapseInterval_strand(sys.argv[1],int(sys.argv[2])-1,int(sys.argv[3])-1)