Mercurial > repos > xuebing > sharplab_seq_motif
view mytools/bedClean.py @ 0:39217fa39ff2
Uploaded
author | xuebing |
---|---|
date | Tue, 13 Mar 2012 23:34:52 -0400 |
parents | |
children |
line wrap: on
line source
import sys def readChrSize(filename): f = open(filename) chrSize = {} for line in f: chrom,size = line.strip().split() chrSize[chrom]=int(size) f.close() return chrSize def cleanFile(filename,chrSize,outfile): f = open(filename) out = open(outfile,'w') i = 0 for line in f: i = i + 1 flds = line.strip().split('\t') if len(flds) < 3: print 'line',i,'incomplete line:\n',line elif chrSize.has_key(flds[0]): if int(flds[1]) > int(flds[2]): tmp = flds[1] flds[1] = flds[2] flds[2] = tmp if int( flds[1]) < 0 or int(flds[2]) <0: print 'line',i,'negative coordinates:\n',line elif int(flds[2]) > chrSize[flds[0]]: print 'line',i,'end larger than chr size:\n',line else: out.write('\t'.join(flds)+'\n') else: print 'line',i,'chromosome',flds[0],'not found!\n',line f.close() out.close() cleanFile(sys.argv[1],readChrSize(sys.argv[2]),sys.argv[3])