annotate windowSplitter.py @ 3:d8515fe22ac8 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
author devteam
date Wed, 03 Jun 2020 09:45:06 -0400
parents e5490ea33640
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
2 """
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
3 Split into windows.
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
4
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
5 usage: %prog input size out_file
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
6 -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
7 """
3
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
8 from __future__ import print_function
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
9
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
10 import sys
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
11
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
12 from bx.cookbook import doc_optparse
3
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
13
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
14
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
15 # Default chrom, start, end, strand cols for a bed file
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
16 BED_DEFAULT_COLS = 0, 1, 2, 5
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
17
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
18
3
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
19 def parse_cols_arg(cols):
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
20 """Parse a columns command line argument into a four-tuple"""
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
21 if cols:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
22 # Handle case where no strand column included - in this case, cols
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
23 # looks something like 1,2,3,
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
24 if cols.endswith(','):
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
25 cols += '0'
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
26 col_list = [int(x) - 1 for x in cols.split(",")]
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
27 return col_list
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
28 else:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
29 return BED_DEFAULT_COLS
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
30
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
31
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
32 def main():
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
33 # Parsing Command Line here
3
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
34 options, args = doc_optparse.parse(__doc__)
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
35
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
36 try:
3
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
37 chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg(options.cols)
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
38 inp_file, winsize, out_file, makesliding, offset = args
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
39 winsize = int(winsize)
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
40 offset = int(offset)
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
41 makesliding = int(makesliding)
3
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
42 except Exception:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
43 sys.exit("Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset.")
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
44
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
45 skipped_lines = 0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
46 first_invalid_line = 0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
47 invalid_line = None
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
48 if offset == 0:
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
49 makesliding = 0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
50
3
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
51 with open(out_file, 'w') as fo, open(inp_file) as fi:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
52 for i, line in enumerate(fi):
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
53 line = line.strip()
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
54 if line and line[0:1] != "#":
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
55 try:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
56 elems = line.split('\t')
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
57 start = int(elems[start_col_1])
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
58 end = int(elems[end_col_1])
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
59 if makesliding == 0:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
60 numwin = (end - start) // winsize
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
61 else:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
62 numwin = (end - start) // offset
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
63 if numwin > 0:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
64 for _ in range(numwin):
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
65 elems_1 = elems
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
66 elems_1[start_col_1] = str(start)
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
67 elems_1[end_col_1] = str(start + winsize)
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
68 fo.write("%s\n" % '\t'.join(elems_1))
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
69 if makesliding == 0:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
70 start = start + winsize
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
71 else:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
72 start = start + offset
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
73 if start + winsize > end:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
74 break
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
75 except Exception:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
76 skipped_lines += 1
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
77 if not invalid_line:
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
78 first_invalid_line = i + 1
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
79 invalid_line = line
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
80
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
81 if makesliding == 1:
3
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
82 print('Window size=%d, Sliding=Yes, Offset=%d' % (winsize, offset))
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
83 else:
3
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
84 print('Window size=%d, Sliding=No' % (winsize))
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
85 if skipped_lines > 0:
3
d8515fe22ac8 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/windowsplitter commit 6960b37b09831a1b83f18775677b83a3cc5c2cd0"
devteam
parents: 0
diff changeset
86 print('Skipped %d invalid lines starting with #%d: "%s"' % (skipped_lines, first_invalid_line, invalid_line))
0
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
87
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
88
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
89 if __name__ == "__main__":
e5490ea33640 Imported from capsule None
devteam
parents:
diff changeset
90 main()