0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Split into windows.
|
|
5
|
|
6 usage: %prog input size out_file
|
|
7 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
|
|
8 """
|
|
9
|
|
10 import sys
|
|
11
|
|
12 from galaxy import eggs
|
|
13 import pkg_resources
|
|
14 pkg_resources.require( "bx-python" )
|
|
15 from bx.cookbook import doc_optparse
|
|
16 from galaxy.tools.util.galaxyops import *
|
|
17
|
|
18 def stop_err( msg ):
|
|
19 sys.stderr.write( msg )
|
|
20 sys.exit()
|
|
21
|
|
22
|
|
23 def main():
|
|
24 # Parsing Command Line here
|
|
25 options, args = doc_optparse.parse( __doc__ )
|
|
26
|
|
27 try:
|
|
28 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
|
|
29 inp_file, winsize, out_file, makesliding, offset = args
|
|
30 winsize = int(winsize)
|
|
31 offset = int(offset)
|
|
32 makesliding = int(makesliding)
|
|
33 except:
|
|
34 stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
|
|
35
|
|
36 fo = open(out_file,'w')
|
|
37
|
|
38 skipped_lines = 0
|
|
39 first_invalid_line = 0
|
|
40 invalid_line = None
|
|
41 if offset == 0:
|
|
42 makesliding = 0
|
|
43
|
|
44 for i, line in enumerate( file( inp_file ) ):
|
|
45 line = line.strip()
|
|
46 if line and line[0:1] != "#":
|
|
47 try:
|
|
48 elems = line.split('\t')
|
|
49 start = int(elems[start_col_1])
|
|
50 end = int(elems[end_col_1])
|
|
51 if makesliding == 0:
|
|
52 numwin = (end - start)/winsize
|
|
53 else:
|
|
54 numwin = (end - start)/offset
|
|
55 if numwin > 0:
|
|
56 for win in range(numwin):
|
|
57 elems_1 = elems
|
|
58 elems_1[start_col_1] = str(start)
|
|
59 elems_1[end_col_1] = str(start + winsize)
|
|
60 fo.write( "%s\n" % '\t'.join( elems_1 ) )
|
|
61 if makesliding == 0:
|
|
62 start = start + winsize
|
|
63 else:
|
|
64 start = start + offset
|
|
65 if start+winsize > end:
|
|
66 break
|
|
67 except:
|
|
68 skipped_lines += 1
|
|
69 if not invalid_line:
|
|
70 first_invalid_line = i + 1
|
|
71 invalid_line = line
|
|
72
|
|
73 fo.close()
|
|
74
|
|
75 if makesliding == 1:
|
|
76 print 'Window size=%d, Sliding=Yes, Offset=%d' % ( winsize, offset )
|
|
77 else:
|
|
78 print 'Window size=%d, Sliding=No' % (winsize)
|
|
79 if skipped_lines > 0:
|
|
80 print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
|
|
81
|
|
82
|
|
83 if __name__ == "__main__":
|
|
84 main()
|