Mercurial > repos > galaxyp > gffcompare_to_bed
diff gffcompare_to_bed.py @ 0:7e572e148175 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/gffcompare_to_bed commit 321b217382f6be33bd77c7dbb51c8caf5fa50afe
author | galaxyp |
---|---|
date | Thu, 11 Jan 2018 11:16:51 -0500 |
parents | |
children | 9a4cfc910674 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gffcompare_to_bed.py Thu Jan 11 11:16:51 2018 -0500 @@ -0,0 +1,132 @@ +#!/usr/bin/env python +""" +# +#------------------------------------------------------------------------------ +# University of Minnesota +# Copyright 2017, Regents of the University of Minnesota +#------------------------------------------------------------------------------ +# Author: +# +# James E Johnson +# +#------------------------------------------------------------------------------ +""" + +import argparse +import sys + + +class BedEntry(object): + def __init__(self, chrom=None, chromStart=None, chromEnd=None, + name=None, score=None, strand=None, + thickStart=None, thickEnd=None, itemRgb=None, + blockCount=None, blockSizes=None, blockStarts=None): + self.chrom = chrom + self.chromStart = int(chromStart) + self.chromEnd = int(chromEnd) + self.name = name + self.score = int(score) if score is not None else 0 + self.strand = '-' if str(strand).startswith('-') else '+' + self.thickStart = int(thickStart) if thickStart else self.chromStart + self.thickEnd = int(thickEnd) if thickEnd else self.chromEnd + self.itemRgb = str(itemRgb) if itemRgb is not None else r'100,100,100' + self.blockCount = int(blockCount) + if isinstance(blockSizes, str) or isinstance(blockSizes, unicode): + self.blockSizes = [int(x) for x in blockSizes.split(',')] + elif isinstance(blockSizes, list): + self.blockSizes = [int(x) for x in blockSizes] + else: + self.blockSizes = blockSizes + if isinstance(blockStarts, str) or isinstance(blockSizes, unicode): + self.blockStarts = [int(x) for x in blockStarts.split(',')] + elif isinstance(blockStarts, list): + self.blockStarts = [int(x) for x in blockStarts] + else: + self.blockStarts = blockStarts + + def __str__(self): + return '%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s' % ( + self.chrom, self.chromStart, self.chromEnd, + self.name, self.score, self.strand, + self.thickStart, self.thickEnd, str(self.itemRgb), self.blockCount, + ','.join([str(x) for x in self.blockSizes]), + ','.join([str(x) for x in self.blockStarts])) + + +def __main__(): + parser = argparse.ArgumentParser( + description='Retrieve Ensembl cDNAs and three frame translate') + parser.add_argument( + 'input', + help='GFFCompare annotated GTF file, (-) for stdin') + parser.add_argument( + 'output', + help='BED file, (-) for stdout') + parser.add_argument( + '-C', '--class_code', action='append', default=[], + help='Restrict output to gffcompare class codes') + parser.add_argument('-d', '--debug', action='store_true', help='Debug') + args = parser.parse_args() + + # print >> sys.stderr, "args: %s" % args + input_rdr = open(args.input, 'r') if args.input != '-' else sys.stdin + output_wtr = open(args.output, 'w') if args.output != '-' else sys.stdout + + def write_bed_entry(bed): + if bed.blockCount == 0: + bed.blockCount = 1 + output_wtr.write("%s\n" % str(bed)) + + class_codes = [c.strip() for codes in args.class_code + for c in codes.split(',')] if args.class_code else None + bed = None + class_code = None + for i, line in enumerate(input_rdr): + if line.startswith('#'): + continue + fields = line.rstrip('\r\n').split('\t') + if len(fields) != 9: + continue + (seqname, source, feature, start, end, + score, strand, frame, attributes) = fields + attribute = {i[0]: i[1].strip('"') for i in [j.strip().split(' ') + for j in attributes.rstrip(';').split(';')]} + if feature == 'transcript': + if args.debug: + print >> sys.stderr, "%s\t%s"\ + % ('\t'.join([seqname, source, feature, + start, end, score, strand, frame]), + attribute) + if bed is not None: + write_bed_entry(bed) + bed = None + class_code = attribute['class_code'].strip('"')\ + if 'class_code' in attribute else None + if class_codes and class_code not in class_codes: + continue + chromStart = int(start) - 1 + chromEnd = int(end) + cat = '_' + class_code if class_code and class_code != '=' else '' + bed = BedEntry(chrom=seqname, + chromStart=chromStart, chromEnd=chromEnd, + name=attribute['transcript_id'] + cat, + strand=strand, + blockCount=0, + blockSizes=[chromEnd - chromStart], + blockStarts=[0]) + elif feature == 'exon' and bed is not None: + chromStart = int(start) - 1 + chromEnd = int(end) + blockSize = chromEnd - chromStart + if bed.blockCount == 0: + bed.blockSizes = [] + bed.blockStarts = [] + bed.blockSizes.append(blockSize) + bed.blockStarts.append(chromStart - bed.chromStart) + bed.blockCount += 1 + if bed is not None: + write_bed_entry(bed) + + +if __name__ == "__main__": + __main__()