comparison bedClean.py @ 11:b7f1d9f8f3bc

Uploaded
author xuebing
date Sat, 10 Mar 2012 07:59:27 -0500
parents
children
comparison
equal deleted inserted replaced
10:1558594a3c2e 11:b7f1d9f8f3bc
1 import sys
2
3 def readChrSize(filename):
4 f = open(filename)
5 chrSize = {}
6 for line in f:
7 chrom,size = line.strip().split()
8 chrSize[chrom]=int(size)
9 f.close()
10 return chrSize
11
12 def cleanFile(filename,chrSize,outfile):
13 f = open(filename)
14 out = open(outfile,'w')
15 i = 0
16 for line in f:
17 i = i + 1
18 flds = line.strip().split('\t')
19 if len(flds) < 3:
20 print 'line',i,'incomplete line:\n',line
21 elif chrSize.has_key(flds[0]):
22 if int(flds[1]) > int(flds[2]):
23 tmp = flds[1]
24 flds[1] = flds[2]
25 flds[2] = tmp
26 if int( flds[1]) < 0 or int(flds[2]) <0:
27 print 'line',i,'negative coordinates:\n',line
28 elif int(flds[2]) > chrSize[flds[0]]:
29 print 'line',i,'end larger than chr size:\n',line
30 else:
31 out.write('\t'.join(flds)+'\n')
32 else:
33 print 'line',i,'chromosome',flds[0],'not found!\n',line
34 f.close()
35 out.close()
36
37 cleanFile(sys.argv[1],readChrSize(sys.argv[2]),sys.argv[3])