annotate windowSplitter.py @ 0:e5490ea33640

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:50:03 -0400
parents
children d8515fe22ac8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
2
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
3 """
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
4 Split into windows.
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
5
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
6 usage: %prog input size out_file
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
7 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
8 """
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
9
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
10 import sys
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
11
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
12 from galaxy import eggs
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
13 import pkg_resources
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
14 pkg_resources.require( "bx-python" )
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
15 from bx.cookbook import doc_optparse
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
16 from galaxy.tools.util.galaxyops import *
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
17
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
18 def stop_err( msg ):
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
19 sys.stderr.write( msg )
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
20 sys.exit()
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
21
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
22
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
23 def main():
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
24 # Parsing Command Line here
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
25 options, args = doc_optparse.parse( __doc__ )
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
26
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
27 try:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
28 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
29 inp_file, winsize, out_file, makesliding, offset = args
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
30 winsize = int(winsize)
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
31 offset = int(offset)
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
32 makesliding = int(makesliding)
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
33 except:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
34 stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
35
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
36 fo = open(out_file,'w')
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
37
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
38 skipped_lines = 0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
39 first_invalid_line = 0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
40 invalid_line = None
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
41 if offset == 0:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
42 makesliding = 0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
43
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
44 for i, line in enumerate( file( inp_file ) ):
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
45 line = line.strip()
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
46 if line and line[0:1] != "#":
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
47 try:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
48 elems = line.split('\t')
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
49 start = int(elems[start_col_1])
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
50 end = int(elems[end_col_1])
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
51 if makesliding == 0:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
52 numwin = (end - start)/winsize
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
53 else:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
54 numwin = (end - start)/offset
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
55 if numwin > 0:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
56 for win in range(numwin):
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
57 elems_1 = elems
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
58 elems_1[start_col_1] = str(start)
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
59 elems_1[end_col_1] = str(start + winsize)
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
60 fo.write( "%s\n" % '\t'.join( elems_1 ) )
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
61 if makesliding == 0:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
62 start = start + winsize
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
63 else:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
64 start = start + offset
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
65 if start+winsize > end:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
66 break
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
67 except:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
68 skipped_lines += 1
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
69 if not invalid_line:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
70 first_invalid_line = i + 1
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
71 invalid_line = line
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
72
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
73 fo.close()
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
74
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
75 if makesliding == 1:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
76 print 'Window size=%d, Sliding=Yes, Offset=%d' % ( winsize, offset )
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
77 else:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
78 print 'Window size=%d, Sliding=No' % (winsize)
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
79 if skipped_lines > 0:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
80 print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
81
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
82
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
83 if __name__ == "__main__":
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
84 main()