Mercurial > repos > devteam > windowsplitter
comparison windowSplitter.py @ 3:d8515fe22ac8 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
author | devteam |
---|---|
date | Wed, 03 Jun 2020 09:45:06 -0400 |
parents | e5490ea33640 |
children |
comparison
equal
deleted
inserted
replaced
2:592089499ae7 | 3:d8515fe22ac8 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | |
3 """ | 2 """ |
4 Split into windows. | 3 Split into windows. |
5 | 4 |
6 usage: %prog input size out_file | 5 usage: %prog input size out_file |
7 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file | 6 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file |
8 """ | 7 """ |
8 from __future__ import print_function | |
9 | 9 |
10 import sys | 10 import sys |
11 | 11 |
12 from galaxy import eggs | |
13 import pkg_resources | |
14 pkg_resources.require( "bx-python" ) | |
15 from bx.cookbook import doc_optparse | 12 from bx.cookbook import doc_optparse |
16 from galaxy.tools.util.galaxyops import * | |
17 | 13 |
18 def stop_err( msg ): | 14 |
19 sys.stderr.write( msg ) | 15 # Default chrom, start, end, strand cols for a bed file |
20 sys.exit() | 16 BED_DEFAULT_COLS = 0, 1, 2, 5 |
17 | |
18 | |
19 def parse_cols_arg(cols): | |
20 """Parse a columns command line argument into a four-tuple""" | |
21 if cols: | |
22 # Handle case where no strand column included - in this case, cols | |
23 # looks something like 1,2,3, | |
24 if cols.endswith(','): | |
25 cols += '0' | |
26 col_list = [int(x) - 1 for x in cols.split(",")] | |
27 return col_list | |
28 else: | |
29 return BED_DEFAULT_COLS | |
21 | 30 |
22 | 31 |
23 def main(): | 32 def main(): |
24 # Parsing Command Line here | 33 # Parsing Command Line here |
25 options, args = doc_optparse.parse( __doc__ ) | 34 options, args = doc_optparse.parse(__doc__) |
26 | 35 |
27 try: | 36 try: |
28 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols ) | 37 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg(options.cols) |
29 inp_file, winsize, out_file, makesliding, offset = args | 38 inp_file, winsize, out_file, makesliding, offset = args |
30 winsize = int(winsize) | 39 winsize = int(winsize) |
31 offset = int(offset) | 40 offset = int(offset) |
32 makesliding = int(makesliding) | 41 makesliding = int(makesliding) |
33 except: | 42 except Exception: |
34 stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." ) | 43 sys.exit("Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset.") |
35 | |
36 fo = open(out_file,'w') | |
37 | 44 |
38 skipped_lines = 0 | 45 skipped_lines = 0 |
39 first_invalid_line = 0 | 46 first_invalid_line = 0 |
40 invalid_line = None | 47 invalid_line = None |
41 if offset == 0: | 48 if offset == 0: |
42 makesliding = 0 | 49 makesliding = 0 |
43 | 50 |
44 for i, line in enumerate( file( inp_file ) ): | 51 with open(out_file, 'w') as fo, open(inp_file) as fi: |
45 line = line.strip() | 52 for i, line in enumerate(fi): |
46 if line and line[0:1] != "#": | 53 line = line.strip() |
47 try: | 54 if line and line[0:1] != "#": |
48 elems = line.split('\t') | 55 try: |
49 start = int(elems[start_col_1]) | 56 elems = line.split('\t') |
50 end = int(elems[end_col_1]) | 57 start = int(elems[start_col_1]) |
51 if makesliding == 0: | 58 end = int(elems[end_col_1]) |
52 numwin = (end - start)/winsize | 59 if makesliding == 0: |
53 else: | 60 numwin = (end - start) // winsize |
54 numwin = (end - start)/offset | 61 else: |
55 if numwin > 0: | 62 numwin = (end - start) // offset |
56 for win in range(numwin): | 63 if numwin > 0: |
57 elems_1 = elems | 64 for _ in range(numwin): |
58 elems_1[start_col_1] = str(start) | 65 elems_1 = elems |
59 elems_1[end_col_1] = str(start + winsize) | 66 elems_1[start_col_1] = str(start) |
60 fo.write( "%s\n" % '\t'.join( elems_1 ) ) | 67 elems_1[end_col_1] = str(start + winsize) |
61 if makesliding == 0: | 68 fo.write("%s\n" % '\t'.join(elems_1)) |
62 start = start + winsize | 69 if makesliding == 0: |
63 else: | 70 start = start + winsize |
64 start = start + offset | 71 else: |
65 if start+winsize > end: | 72 start = start + offset |
66 break | 73 if start + winsize > end: |
67 except: | 74 break |
68 skipped_lines += 1 | 75 except Exception: |
69 if not invalid_line: | 76 skipped_lines += 1 |
70 first_invalid_line = i + 1 | 77 if not invalid_line: |
71 invalid_line = line | 78 first_invalid_line = i + 1 |
72 | 79 invalid_line = line |
73 fo.close() | |
74 | 80 |
75 if makesliding == 1: | 81 if makesliding == 1: |
76 print 'Window size=%d, Sliding=Yes, Offset=%d' % ( winsize, offset ) | 82 print('Window size=%d, Sliding=Yes, Offset=%d' % (winsize, offset)) |
77 else: | 83 else: |
78 print 'Window size=%d, Sliding=No' % (winsize) | 84 print('Window size=%d, Sliding=No' % (winsize)) |
79 if skipped_lines > 0: | 85 if skipped_lines > 0: |
80 print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) | 86 print('Skipped %d invalid lines starting with #%d: "%s"' % (skipped_lines, first_invalid_line, invalid_line)) |
81 | 87 |
82 | 88 |
83 if __name__ == "__main__": | 89 if __name__ == "__main__": |
84 main() | 90 main() |