view collapseBed2.py @ 11:b7f1d9f8f3bc

Uploaded
author xuebing
date Sat, 10 Mar 2012 07:59:27 -0500
parents
children
line wrap: on
line source

'''
collapse intervals
'''

def collapseInterval_strand(filename,c_strand,c_score):
    # keeping max column c
    uniqintv = {}
    data = {}
    f = open(filename)
    header = f.readline()
    if 'chr' in header:
        flds = header.strip().split('\t')
        key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]])
        uniqintv[key] = float(flds[c_score])
        data[key] = flds
    for line in f:
        flds = line.strip().split('\t')
        key = '\t'.join([flds[0],flds[1],flds[2],flds[c_strand]])
        if not uniqintv.has_key(key):
            uniqintv[key] = float(flds[c_score])
            data[key] = flds
        elif uniqintv[key] < float(flds[c_score]):
            uniqintv[key] = float(flds[c_score])
            data[key] = flds
            
    f.close()        
    for key in uniqintv.keys():
        print '\t'.join(data[key])
        
import sys

if sys.argv[2] == '0':#ignore strand
    sys.argv[2] = 1
if sys.argv[3] == '0':# ignore score
    sys.argv[3] = 2
collapseInterval_strand(sys.argv[1],int(sys.argv[2])-1,int(sys.argv[3])-1)