0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 """
|
|
4 Split into windows.
|
|
5
|
|
6 usage: %prog input size out_file
|
|
7 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
|
|
8 """
|
|
9
|
|
10 import sys, re, os
|
|
11
|
|
12 from galaxy import eggs
|
|
13 import pkg_resources; pkg_resources.require( "bx-python" )
|
|
14 from bx.cookbook import doc_optparse
|
|
15 from galaxy.tools.util.galaxyops import *
|
|
16
|
|
17 def stop_err( msg ):
|
|
18 sys.stderr.write( msg )
|
|
19 sys.exit()
|
|
20
|
|
21 def main():
|
|
22 # Parsing Command Line here
|
|
23 options, args = doc_optparse.parse( __doc__ )
|
|
24
|
|
25 try:
|
|
26 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
|
|
27 inp_file, winsize, out_file, makesliding, offset = args
|
|
28 winsize = int(winsize)
|
|
29 offset = int(offset)
|
|
30 makesliding = int(makesliding)
|
|
31 if strand_col_1 <= 0:
|
|
32 strand = "+" #if strand is not defined, default it to +
|
|
33 except:
|
|
34 stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
|
|
35
|
|
36 fo = open(out_file,'w')
|
|
37
|
|
38 skipped_lines = 0
|
|
39 first_invalid_line = 0
|
|
40 invalid_line = None
|
|
41 if offset == 0:
|
|
42 makesliding = 0
|
|
43
|
|
44 for i, line in enumerate( file( inp_file ) ):
|
|
45 line = line.strip()
|
|
46 if line and line[0:1] != "#":
|
|
47 try:
|
|
48 elems = line.split('\t')
|
|
49 if strand_col_1 != -1:
|
|
50 strand = elems[strand_col_1]
|
|
51 start = int(elems[start_col_1])
|
|
52 end = int(elems[end_col_1])
|
|
53 if makesliding == 0:
|
|
54 numwin = (end - start)/winsize
|
|
55 else:
|
|
56 numwin = (end - start)/offset
|
|
57 if numwin > 0:
|
|
58 for win in range(numwin):
|
|
59 elems_1 = elems
|
|
60 elems_1[start_col_1] = str(start)
|
|
61 elems_1[end_col_1] = str(start + winsize)
|
|
62 fo.write( "%s\n" % '\t'.join( elems_1 ) )
|
|
63 if makesliding == 0:
|
|
64 start = start + winsize
|
|
65 else:
|
|
66 start = start + offset
|
|
67 if start+winsize > end:
|
|
68 break
|
|
69 except:
|
|
70 skipped_lines += 1
|
|
71 if not invalid_line:
|
|
72 first_invalid_line = i + 1
|
|
73 invalid_line = line
|
|
74
|
|
75 fo.close()
|
|
76
|
|
77 if makesliding == 1:
|
|
78 print 'Window size=%d, Sliding=Yes, Offset=%d' %(winsize, offset)
|
|
79 else:
|
|
80 print 'Window size=%d, Sliding=No' %(winsize)
|
|
81 if skipped_lines > 0:
|
|
82 print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
|
|
83
|
|
84 if __name__ == "__main__":
|
|
85 main()
|