1 #!/usr/bin/env python
3 """
4 Split into windows.
6 usage: %prog input size out_file
7 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
8 """
10 import sys, re, os
12 from galaxy import eggs
13 import pkg_resources; pkg_resources.require( "bx-python" )
14 from bx.cookbook import doc_optparse
15 from galaxy.tools.util.galaxyops import *
17 def stop_err( msg ):
18 sys.stderr.write( msg )
19 sys.exit()
21 def main():
22 # Parsing Command Line here
23 options, args = doc_optparse.parse( __doc__ )
25 try:
26 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
27 inp_file, winsize, out_file, makesliding, offset = args
28 winsize = int(winsize)
29 offset = int(offset)
30 makesliding = int(makesliding)
31 if strand_col_1 <= 0:
32 strand = "+" #if strand is not defined, default it to +
33 except:
34 stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
36 fo = open(out_file,'w')
38 skipped_lines = 0
39 first_invalid_line = 0
40 invalid_line = None
41 if offset == 0:
42 makesliding = 0
44 for i, line in enumerate( file( inp_file ) ):
45 line = line.strip()
46 if line and line[0:1] != "#":
47 try:
48 elems = line.split('\t')
49 if strand_col_1 != -1:
50 strand = elems[strand_col_1]
51 start = int(elems[start_col_1])
52 end = int(elems[end_col_1])
53 if makesliding == 0:
54 numwin = (end - start)/winsize
55 else:
56 numwin = (end - start)/offset
57 if numwin > 0:
58 for win in range(numwin):
59 elems_1 = elems
60 elems_1[start_col_1] = str(start)
61 elems_1[end_col_1] = str(start + winsize)
62 fo.write( "%s\n" % '\t'.join( elems_1 ) )
63 if makesliding == 0:
64 start = start + winsize
65 else:
66 start = start + offset
67 if start+winsize > end:
68 break
69 except:
70 skipped_lines += 1
71 if not invalid_line:
72 first_invalid_line = i + 1
73 invalid_line = line
75 fo.close()
77 if makesliding == 1:
78 print 'Window size=%d, Sliding=Yes, Offset=%d' %(winsize, offset)
79 else:
80 print 'Window size=%d, Sliding=No' %(winsize)
81 if skipped_lines > 0:
82 print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
84 if __name__ == "__main__":
85 main()