diff bed_to_protein_map.py @ 0:024ed7b0ad93 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/bed_to_protein_map commit 2d39f681f77eedc840c17aebe4ddc8f66c8a7c62-dirty
author galaxyp
date Thu, 04 Jan 2018 16:29:38 -0500
parents
children a7c58b43cbaa
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bed_to_protein_map.py	Thu Jan 04 16:29:38 2018 -0500
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+"""
+#
+#------------------------------------------------------------------------------
+#                         University of Minnesota
+#         Copyright 2017, Regents of the University of Minnesota
+#------------------------------------------------------------------------------
+# Author:
+#
+#  James E Johnson
+#
+#------------------------------------------------------------------------------
+"""
+
+import argparse
+import sys
+
+
+def __main__():
+    parser = argparse.ArgumentParser(
+        description='Convert BED file to protein mapping')
+    parser.add_argument(
+        'input',
+        help='A BED file (12 column)')
+    parser.add_argument(
+        'output',
+        help='Output file  (-) for stdout')
+    parser.add_argument('-d', '--debug', action='store_true', help='Debug')
+    args = parser.parse_args()
+
+    input_rdr = open(args.input, 'r') if args.input != '-' else sys.stdin
+    output_wtr = open(args.output, 'w') if args.output != '-' else sys.stdout
+
+    try:
+        for linenum, line in enumerate(input_rdr):
+            if args.debug:
+                print >> sys.stderr, "%d: %s\n" % (linenum, line)
+            if line.startswith('#'):
+                continue
+            if line.strip() == '':
+                continue
+            fields = line.rstrip('\r\n').split('\t')
+            if len(fields) < 12:
+                print >> sys.stderr, "%d: %s\n" % (linenum, line)
+                continue
+            (chrom, _chromStart, _chromEnd, name, score, strand,
+             _thickStart, _thickEnd, itemRgb,
+             _blockCount, blockSizes, blockStarts) = fields[0:12]
+            chromStart = int(_chromStart)
+            thickStart = int(_thickStart)
+            thickEnd = int(_thickEnd)
+            blockCount = int(_blockCount)
+            blockSizes = [int(x) for x in blockSizes.split(',')]
+            blockStarts = [int(x) for x in blockStarts.split(',')]
+            if strand == '+':
+                cds_start = 0
+                cds_end = 0
+                for i in range(blockCount):
+                    start = chromStart + blockStarts[i]
+                    end = start + blockSizes[i]
+                    if end < thickStart:
+                        continue
+                    if start > thickEnd:
+                        break
+                    if start < thickStart:
+                        start = thickStart
+                    if end > thickEnd:
+                        end = thickEnd
+                    cds_end = cds_start + (end - start)
+                    output_wtr.write('%s\t%s\t%d\t%d\t%s\t%d\t%d\n'
+                                     % (name, chrom, start, end,
+                                        strand, cds_start, cds_end))
+                    cds_start = cds_end
+            elif strand == '-':
+                cds_start = 0
+                cds_end = 0
+                for i in reversed(range(blockCount)):
+                    start = chromStart + blockStarts[i]
+                    end = start + blockSizes[i]
+                    if end < thickStart:
+                        break
+                    if start > thickEnd:
+                        continue
+                    if start < thickStart:
+                        start = thickStart
+                    if end > thickEnd:
+                        end = thickEnd
+                    cds_end = cds_start + (end - start)
+                    output_wtr.write('%s\t%s\t%d\t%d\t%s\t%d\t%d\n'
+                                     % (name, chrom, start, end,
+                                        strand, cds_start, cds_end))
+                    cds_start = cds_end
+                pass
+    except Exception, e:
+        print >> sys.stderr, "failed: %s" % e
+        exit(1)
+
+
+if __name__ == "__main__":
+    __main__()