Mercurial > repos > devteam > sam2interval
annotate sam2interval.py @ 1:75557c0908a9 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
| author | devteam |
|---|---|
| date | Wed, 05 Feb 2020 06:58:53 -0500 |
| parents | 8c737b8ddc45 |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 | |
| 3 import sys | |
| 4 import optparse | |
| 5 import re | |
| 6 | |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
7 |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
8 def stop_err(msg): |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
9 sys.exit(msg) |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
10 |
| 0 | 11 |
| 12 def main(): | |
| 13 usage = """%prog [options] | |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
14 |
| 0 | 15 options (listed below) default to 'None' if omitted |
| 16 """ | |
| 17 parser = optparse.OptionParser(usage=usage) | |
| 18 | |
| 19 parser.add_option( | |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
20 '-f', '--input_sam_file', |
| 0 | 21 metavar="INPUT_SAM_FILE", |
| 22 dest='input_sam', | |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
23 default=False, |
| 0 | 24 help='Name of the SAM file to be filtered. STDIN is default') |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
25 |
| 0 | 26 parser.add_option( |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
27 '-c', '--flag_column', |
| 0 | 28 dest='flag_col', |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
29 default='2', |
| 0 | 30 help='Column containing SAM bitwise flag. 1-based') |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
31 |
| 0 | 32 parser.add_option( |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
33 '-s', '--start_column', |
| 0 | 34 dest='start_col', |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
35 default='4', |
| 0 | 36 help='Column containing position. 1-based') |
| 37 | |
| 38 parser.add_option( | |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
39 '-g', '--cigar_column', |
| 0 | 40 dest='cigar_col', |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
41 default='6', |
| 0 | 42 help='Column containing CIGAR or extended CIGAR string') |
| 43 | |
| 44 parser.add_option( | |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
45 '-r', '--ref_column', |
| 0 | 46 dest='ref_col', |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
47 default='3', |
| 0 | 48 help='Column containing name of the reference sequence coordinate. 1-based') |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
49 |
| 0 | 50 parser.add_option( |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
51 '-e', '--read_column', |
| 0 | 52 dest='read_col', |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
53 default='1', |
| 0 | 54 help='Column containing read name. 1-based') |
| 55 | |
| 56 parser.add_option( | |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
57 '-p', '--print_all', |
| 0 | 58 dest='prt_all', |
| 59 action='store_true', | |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
60 default=False, |
| 0 | 61 help='Print coordinates and original SAM?') |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
62 |
| 0 | 63 options, args = parser.parse_args() |
| 64 | |
| 65 if options.input_sam: | |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
66 infile = open(options.input_sam, 'r') |
| 0 | 67 else: |
| 68 infile = sys.stdin | |
| 69 | |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
70 cigar = re.compile('\d+M|\d+N|\d+D|\d+P') |
| 0 | 71 |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
72 print('#chrom\tstart\tend\tstrand\tread_name') # provide a (partial) header so that strand is automatically set in metadata |
| 0 | 73 |
| 74 for line in infile: | |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
75 line = line.rstrip('\r\n') |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
76 if line and not line.startswith('#') and not line.startswith('@'): |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
77 fields = line.split('\t') |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
78 start = int(fields[int(options.start_col) - 1]) - 1 |
| 0 | 79 end = 0 |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
80 for op in cigar.findall(fields[int(options.cigar_col) - 1]): |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
81 end += int(op[0:len(op) - 1]) |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
82 |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
83 strand = '+' |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
84 if bool(int(fields[int(options.flag_col) - 1]) & 0x0010): |
| 0 | 85 strand = '-' |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
86 read_name = fields[int(options.read_col) - 1] |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
87 ref_name = fields[int(options.ref_col) - 1] |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
88 |
| 0 | 89 if ref_name != '*': |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
90 # Do not print lines with unmapped reads that contain '*' instead of chromosome name |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
91 if options.prt_all: |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
92 print('%s\t%s\t%s\t%s\t%s' % (ref_name, str(start), str(end + start), strand, line)) |
| 0 | 93 else: |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
94 print('%s\t%s\t%s\t%s\t%s' % (ref_name, str(start), str(end + start), strand, read_name)) |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
95 |
| 0 | 96 |
|
1
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
97 if __name__ == "__main__": |
|
75557c0908a9
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/sam2interval commit 206cd8245e7619b0e924c5066d0172129222993d"
devteam
parents:
0
diff
changeset
|
98 main() |
