Mercurial > repos > xuebing > sharplab_seq_motif
comparison mytools/bedClean.py @ 0:39217fa39ff2
Uploaded
author | xuebing |
---|---|
date | Tue, 13 Mar 2012 23:34:52 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:39217fa39ff2 |
---|---|
1 import sys | |
2 | |
3 def readChrSize(filename): | |
4 f = open(filename) | |
5 chrSize = {} | |
6 for line in f: | |
7 chrom,size = line.strip().split() | |
8 chrSize[chrom]=int(size) | |
9 f.close() | |
10 return chrSize | |
11 | |
12 def cleanFile(filename,chrSize,outfile): | |
13 f = open(filename) | |
14 out = open(outfile,'w') | |
15 i = 0 | |
16 for line in f: | |
17 i = i + 1 | |
18 flds = line.strip().split('\t') | |
19 if len(flds) < 3: | |
20 print 'line',i,'incomplete line:\n',line | |
21 elif chrSize.has_key(flds[0]): | |
22 if int(flds[1]) > int(flds[2]): | |
23 tmp = flds[1] | |
24 flds[1] = flds[2] | |
25 flds[2] = tmp | |
26 if int( flds[1]) < 0 or int(flds[2]) <0: | |
27 print 'line',i,'negative coordinates:\n',line | |
28 elif int(flds[2]) > chrSize[flds[0]]: | |
29 print 'line',i,'end larger than chr size:\n',line | |
30 else: | |
31 out.write('\t'.join(flds)+'\n') | |
32 else: | |
33 print 'line',i,'chromosome',flds[0],'not found!\n',line | |
34 f.close() | |
35 out.close() | |
36 | |
37 cleanFile(sys.argv[1],readChrSize(sys.argv[2]),sys.argv[3]) |