Mercurial > repos > xuebing > sharplab_interval_analysis
view bedClean.py @ 24:8dd2a3f51c42 draft default tip
Deleted selected files
author | xuebing |
---|---|
date | Wed, 09 May 2012 10:53:44 -0400 |
parents | 16ba480adf96 |
children |
line wrap: on
line source
import sys def readChrSize(filename): f = open(filename) chrSize = {} for line in f: chrom,size = line.strip().split() chrSize[chrom]=int(size) f.close() return chrSize def cleanFile(filename,chrSize,outfile): f = open(filename) out = open(outfile,'w') i = 0 for line in f: i = i + 1 flds = line.strip().split('\t') if len(flds) < 3: print 'line',i,'incomplete line:\n',line elif chrSize.has_key(flds[0]): if int(flds[1]) > int(flds[2]): tmp = flds[1] flds[1] = flds[2] flds[2] = tmp if int( flds[1]) < 0 or int(flds[2]) <0: print 'line',i,'negative coordinates:\n',line elif int(flds[2]) > chrSize[flds[0]]: print 'line',i,'end larger than chr size:\n',line else: if flds[5] == '*': flds[5] = '+' print 'line',i,' strand * changed to +\n', line out.write('\t'.join(flds)+'\n') else: print 'line',i,'chromosome',flds[0],'not found!\n',line f.close() out.close() if len(sys.argv) < 4: print "python bedClean.py in.bed chrsizefile out.bed" exit() cleanFile(sys.argv[1],readChrSize(sys.argv[2]),sys.argv[3])