comparison bed_to_protein_map.py @ 0:024ed7b0ad93 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/bed_to_protein_map commit 2d39f681f77eedc840c17aebe4ddc8f66c8a7c62-dirty
author galaxyp
date Thu, 04 Jan 2018 16:29:38 -0500
parents
children a7c58b43cbaa
comparison
equal deleted inserted replaced
-1:000000000000 0:024ed7b0ad93
1 #!/usr/bin/env python
2 """
3 #
4 #------------------------------------------------------------------------------
5 # University of Minnesota
6 # Copyright 2017, Regents of the University of Minnesota
7 #------------------------------------------------------------------------------
8 # Author:
9 #
10 # James E Johnson
11 #
12 #------------------------------------------------------------------------------
13 """
14
15 import argparse
16 import sys
17
18
19 def __main__():
20 parser = argparse.ArgumentParser(
21 description='Convert BED file to protein mapping')
22 parser.add_argument(
23 'input',
24 help='A BED file (12 column)')
25 parser.add_argument(
26 'output',
27 help='Output file (-) for stdout')
28 parser.add_argument('-d', '--debug', action='store_true', help='Debug')
29 args = parser.parse_args()
30
31 input_rdr = open(args.input, 'r') if args.input != '-' else sys.stdin
32 output_wtr = open(args.output, 'w') if args.output != '-' else sys.stdout
33
34 try:
35 for linenum, line in enumerate(input_rdr):
36 if args.debug:
37 print >> sys.stderr, "%d: %s\n" % (linenum, line)
38 if line.startswith('#'):
39 continue
40 if line.strip() == '':
41 continue
42 fields = line.rstrip('\r\n').split('\t')
43 if len(fields) < 12:
44 print >> sys.stderr, "%d: %s\n" % (linenum, line)
45 continue
46 (chrom, _chromStart, _chromEnd, name, score, strand,
47 _thickStart, _thickEnd, itemRgb,
48 _blockCount, blockSizes, blockStarts) = fields[0:12]
49 chromStart = int(_chromStart)
50 thickStart = int(_thickStart)
51 thickEnd = int(_thickEnd)
52 blockCount = int(_blockCount)
53 blockSizes = [int(x) for x in blockSizes.split(',')]
54 blockStarts = [int(x) for x in blockStarts.split(',')]
55 if strand == '+':
56 cds_start = 0
57 cds_end = 0
58 for i in range(blockCount):
59 start = chromStart + blockStarts[i]
60 end = start + blockSizes[i]
61 if end < thickStart:
62 continue
63 if start > thickEnd:
64 break
65 if start < thickStart:
66 start = thickStart
67 if end > thickEnd:
68 end = thickEnd
69 cds_end = cds_start + (end - start)
70 output_wtr.write('%s\t%s\t%d\t%d\t%s\t%d\t%d\n'
71 % (name, chrom, start, end,
72 strand, cds_start, cds_end))
73 cds_start = cds_end
74 elif strand == '-':
75 cds_start = 0
76 cds_end = 0
77 for i in reversed(range(blockCount)):
78 start = chromStart + blockStarts[i]
79 end = start + blockSizes[i]
80 if end < thickStart:
81 break
82 if start > thickEnd:
83 continue
84 if start < thickStart:
85 start = thickStart
86 if end > thickEnd:
87 end = thickEnd
88 cds_end = cds_start + (end - start)
89 output_wtr.write('%s\t%s\t%d\t%d\t%s\t%d\t%d\n'
90 % (name, chrom, start, end,
91 strand, cds_start, cds_end))
92 cds_start = cds_end
93 pass
94 except Exception, e:
95 print >> sys.stderr, "failed: %s" % e
96 exit(1)
97
98
99 if __name__ == "__main__":
100 __main__()