Mercurial > repos > devteam > sam2interval
annotate sam2interval.py @ 1:75557c0908a9 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
author | devteam |
---|---|
date | Wed, 05 Feb 2020 06:58:53 -0500 |
parents | 8c737b8ddc45 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 | |
3 import sys | |
4 import optparse | |
5 import re | |
6 | |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
7 |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
8 def stop_err(msg): |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
9 sys.exit(msg) |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
10 |
0 | 11 |
12 def main(): | |
13 usage = """%prog [options] | |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
14 |
0 | 15 options (listed below) default to 'None' if omitted |
16 """ | |
17 parser = optparse.OptionParser(usage=usage) | |
18 | |
19 parser.add_option( | |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
20 '-f', '--input_sam_file', |
0 | 21 metavar="INPUT_SAM_FILE", |
22 dest='input_sam', | |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
23 default=False, |
0 | 24 help='Name of the SAM file to be filtered. STDIN is default') |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
25 |
0 | 26 parser.add_option( |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
27 '-c', '--flag_column', |
0 | 28 dest='flag_col', |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
29 default='2', |
0 | 30 help='Column containing SAM bitwise flag. 1-based') |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
31 |
0 | 32 parser.add_option( |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
33 '-s', '--start_column', |
0 | 34 dest='start_col', |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
35 default='4', |
0 | 36 help='Column containing position. 1-based') |
37 | |
38 parser.add_option( | |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
39 '-g', '--cigar_column', |
0 | 40 dest='cigar_col', |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
41 default='6', |
0 | 42 help='Column containing CIGAR or extended CIGAR string') |
43 | |
44 parser.add_option( | |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
45 '-r', '--ref_column', |
0 | 46 dest='ref_col', |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
47 default='3', |
0 | 48 help='Column containing name of the reference sequence coordinate. 1-based') |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
49 |
0 | 50 parser.add_option( |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
51 '-e', '--read_column', |
0 | 52 dest='read_col', |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
53 default='1', |
0 | 54 help='Column containing read name. 1-based') |
55 | |
56 parser.add_option( | |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
57 '-p', '--print_all', |
0 | 58 dest='prt_all', |
59 action='store_true', | |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
60 default=False, |
0 | 61 help='Print coordinates and original SAM?') |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
62 |
0 | 63 options, args = parser.parse_args() |
64 | |
65 if options.input_sam: | |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
66 infile = open(options.input_sam, 'r') |
0 | 67 else: |
68 infile = sys.stdin | |
69 | |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
70 cigar = re.compile('\d+M|\d+N|\d+D|\d+P') |
0 | 71 |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
72 print('#chrom\tstart\tend\tstrand\tread_name') # provide a (partial) header so that strand is automatically set in metadata |
0 | 73 |
74 for line in infile: | |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
75 line = line.rstrip('\r\n') |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
76 if line and not line.startswith('#') and not line.startswith('@'): |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
77 fields = line.split('\t') |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
78 start = int(fields[int(options.start_col) - 1]) - 1 |
0 | 79 end = 0 |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
80 for op in cigar.findall(fields[int(options.cigar_col) - 1]): |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
81 end += int(op[0:len(op) - 1]) |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
82 |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
83 strand = '+' |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
84 if bool(int(fields[int(options.flag_col) - 1]) & 0x0010): |
0 | 85 strand = '-' |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
86 read_name = fields[int(options.read_col) - 1] |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
87 ref_name = fields[int(options.ref_col) - 1] |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
88 |
0 | 89 if ref_name != '*': |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
90 # Do not print lines with unmapped reads that contain '*' instead of chromosome name |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
91 if options.prt_all: |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
92 print('%s\t%s\t%s\t%s\t%s' % (ref_name, str(start), str(end + start), strand, line)) |
0 | 93 else: |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
94 print('%s\t%s\t%s\t%s\t%s' % (ref_name, str(start), str(end + start), strand, read_name)) |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
95 |
0 | 96 |
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
97 if __name__ == "__main__": |
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
98 main() |