comparison windowSplitter.py @ 0:e5490ea33640

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:50:03 -0400
parents
children d8515fe22ac8
comparison
equal deleted inserted replaced
-1:000000000000 0:e5490ea33640
1 #!/usr/bin/env python
2
3 """
4 Split into windows.
5
6 usage: %prog input size out_file
7 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
8 """
9
10 import sys
11
12 from galaxy import eggs
13 import pkg_resources
14 pkg_resources.require( "bx-python" )
15 from bx.cookbook import doc_optparse
16 from galaxy.tools.util.galaxyops import *
17
18 def stop_err( msg ):
19 sys.stderr.write( msg )
20 sys.exit()
21
22
23 def main():
24 # Parsing Command Line here
25 options, args = doc_optparse.parse( __doc__ )
26
27 try:
28 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
29 inp_file, winsize, out_file, makesliding, offset = args
30 winsize = int(winsize)
31 offset = int(offset)
32 makesliding = int(makesliding)
33 except:
34 stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
35
36 fo = open(out_file,'w')
37
38 skipped_lines = 0
39 first_invalid_line = 0
40 invalid_line = None
41 if offset == 0:
42 makesliding = 0
43
44 for i, line in enumerate( file( inp_file ) ):
45 line = line.strip()
46 if line and line[0:1] != "#":
47 try:
48 elems = line.split('\t')
49 start = int(elems[start_col_1])
50 end = int(elems[end_col_1])
51 if makesliding == 0:
52 numwin = (end - start)/winsize
53 else:
54 numwin = (end - start)/offset
55 if numwin > 0:
56 for win in range(numwin):
57 elems_1 = elems
58 elems_1[start_col_1] = str(start)
59 elems_1[end_col_1] = str(start + winsize)
60 fo.write( "%s\n" % '\t'.join( elems_1 ) )
61 if makesliding == 0:
62 start = start + winsize
63 else:
64 start = start + offset
65 if start+winsize > end:
66 break
67 except:
68 skipped_lines += 1
69 if not invalid_line:
70 first_invalid_line = i + 1
71 invalid_line = line
72
73 fo.close()
74
75 if makesliding == 1:
76 print 'Window size=%d, Sliding=Yes, Offset=%d' % ( winsize, offset )
77 else:
78 print 'Window size=%d, Sliding=No' % (winsize)
79 if skipped_lines > 0:
80 print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
81
82
83 if __name__ == "__main__":
84 main()