Mercurial > repos > xuebing > sharplabtool
comparison bedclean.py @ 14:76e1b1b21cce default tip
Deleted selected files
| author | xuebing | 
|---|---|
| date | Tue, 13 Mar 2012 19:05:10 -0400 | 
| parents | 292186c14b08 | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 13:292186c14b08 | 14:76e1b1b21cce | 
|---|---|
| 1 import sys | |
| 2 | |
| 3 def readChrSize(filename): | |
| 4 f = open(filename) | |
| 5 chrSize = {} | |
| 6 for line in f: | |
| 7 chrom,size = line.strip().split() | |
| 8 chrSize[chrom]=int(size) | |
| 9 f.close() | |
| 10 return chrSize | |
| 11 | |
| 12 def cleanFile(filename,chrSize,outfile): | |
| 13 f = open(filename) | |
| 14 out = open(outfile,'w') | |
| 15 i = 0 | |
| 16 for line in f: | |
| 17 i = i + 1 | |
| 18 flds = line.strip().split('\t') | |
| 19 if len(flds) < 3: | |
| 20 print 'line',i,'incomplete line:\n',line | |
| 21 elif chrSize.has_key(flds[0]): | |
| 22 if int(flds[1]) > int(flds[2]): | |
| 23 tmp = flds[1] | |
| 24 flds[1] = flds[2] | |
| 25 flds[2] = tmp | |
| 26 if int( flds[1]) < 0 or int(flds[2]) <0: | |
| 27 print 'line',i,'negative coordinates:\n',line | |
| 28 elif int(flds[2]) > chrSize[flds[0]]: | |
| 29 print 'line',i,'end larger than chr size:\n',line | |
| 30 else: | |
| 31 out.write('\t'.join(flds)+'\n') | |
| 32 else: | |
| 33 print 'line',i,'chromosome',flds[0],'not found!\n',line | |
| 34 f.close() | |
| 35 out.close() | |
| 36 | |
| 37 cleanFile(sys.argv[1],readChrSize(sys.argv[2]),sys.argv[3]) | 
