Mercurial > repos > galaxyp > bed_to_protein_map
diff bed_to_protein_map.py @ 0:024ed7b0ad93 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/bed_to_protein_map commit 2d39f681f77eedc840c17aebe4ddc8f66c8a7c62-dirty
author | galaxyp |
---|---|
date | Thu, 04 Jan 2018 16:29:38 -0500 |
parents | |
children | a7c58b43cbaa |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bed_to_protein_map.py Thu Jan 04 16:29:38 2018 -0500 @@ -0,0 +1,100 @@ +#!/usr/bin/env python +""" +# +#------------------------------------------------------------------------------ +# University of Minnesota +# Copyright 2017, Regents of the University of Minnesota +#------------------------------------------------------------------------------ +# Author: +# +# James E Johnson +# +#------------------------------------------------------------------------------ +""" + +import argparse +import sys + + +def __main__(): + parser = argparse.ArgumentParser( + description='Convert BED file to protein mapping') + parser.add_argument( + 'input', + help='A BED file (12 column)') + parser.add_argument( + 'output', + help='Output file (-) for stdout') + parser.add_argument('-d', '--debug', action='store_true', help='Debug') + args = parser.parse_args() + + input_rdr = open(args.input, 'r') if args.input != '-' else sys.stdin + output_wtr = open(args.output, 'w') if args.output != '-' else sys.stdout + + try: + for linenum, line in enumerate(input_rdr): + if args.debug: + print >> sys.stderr, "%d: %s\n" % (linenum, line) + if line.startswith('#'): + continue + if line.strip() == '': + continue + fields = line.rstrip('\r\n').split('\t') + if len(fields) < 12: + print >> sys.stderr, "%d: %s\n" % (linenum, line) + continue + (chrom, _chromStart, _chromEnd, name, score, strand, + _thickStart, _thickEnd, itemRgb, + _blockCount, blockSizes, blockStarts) = fields[0:12] + chromStart = int(_chromStart) + thickStart = int(_thickStart) + thickEnd = int(_thickEnd) + blockCount = int(_blockCount) + blockSizes = [int(x) for x in blockSizes.split(',')] + blockStarts = [int(x) for x in blockStarts.split(',')] + if strand == '+': + cds_start = 0 + cds_end = 0 + for i in range(blockCount): + start = chromStart + blockStarts[i] + end = start + blockSizes[i] + if end < thickStart: + continue + if start > thickEnd: + break + if start < thickStart: + start = thickStart + if end > thickEnd: + end = thickEnd + cds_end = cds_start + (end - start) + output_wtr.write('%s\t%s\t%d\t%d\t%s\t%d\t%d\n' + % (name, chrom, start, end, + strand, cds_start, cds_end)) + cds_start = cds_end + elif strand == '-': + cds_start = 0 + cds_end = 0 + for i in reversed(range(blockCount)): + start = chromStart + blockStarts[i] + end = start + blockSizes[i] + if end < thickStart: + break + if start > thickEnd: + continue + if start < thickStart: + start = thickStart + if end > thickEnd: + end = thickEnd + cds_end = cds_start + (end - start) + output_wtr.write('%s\t%s\t%d\t%d\t%s\t%d\t%d\n' + % (name, chrom, start, end, + strand, cds_start, cds_end)) + cds_start = cds_end + pass + except Exception, e: + print >> sys.stderr, "failed: %s" % e + exit(1) + + +if __name__ == "__main__": + __main__()