annotate tools/regVariation/windowSplitter.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Split into windows.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 usage: %prog input size out_file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 import sys, re, os
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 from galaxy import eggs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 import pkg_resources; pkg_resources.require( "bx-python" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 from bx.cookbook import doc_optparse
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 from galaxy.tools.util.galaxyops import *
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 def stop_err( msg ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 sys.stderr.write( msg )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 # Parsing Command Line here
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 options, args = doc_optparse.parse( __doc__ )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 inp_file, winsize, out_file, makesliding, offset = args
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 winsize = int(winsize)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 offset = int(offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 makesliding = int(makesliding)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 if strand_col_1 <= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 strand = "+" #if strand is not defined, default it to +
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 fo = open(out_file,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 skipped_lines = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 first_invalid_line = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 invalid_line = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 if offset == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 makesliding = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 for i, line in enumerate( file( inp_file ) ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 line = line.strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 if line and line[0:1] != "#":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 elems = line.split('\t')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 if strand_col_1 != -1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 strand = elems[strand_col_1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 start = int(elems[start_col_1])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 end = int(elems[end_col_1])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 if makesliding == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 numwin = (end - start)/winsize
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 numwin = (end - start)/offset
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 if numwin > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 for win in range(numwin):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 elems_1 = elems
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 elems_1[start_col_1] = str(start)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 elems_1[end_col_1] = str(start + winsize)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 fo.write( "%s\n" % '\t'.join( elems_1 ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 if makesliding == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 start = start + winsize
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 start = start + offset
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 if start+winsize > end:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 skipped_lines += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 if not invalid_line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 first_invalid_line = i + 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 invalid_line = line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 fo.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 if makesliding == 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 print 'Window size=%d, Sliding=Yes, Offset=%d' %(winsize, offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 print 'Window size=%d, Sliding=No' %(winsize)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 if skipped_lines > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 main()