comparison gffcompare_to_bed.py @ 0:7e572e148175 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/gffcompare_to_bed commit 321b217382f6be33bd77c7dbb51c8caf5fa50afe
author galaxyp
date Thu, 11 Jan 2018 11:16:51 -0500
parents
children 9a4cfc910674
comparison
equal deleted inserted replaced
-1:000000000000 0:7e572e148175
1 #!/usr/bin/env python
2 """
3 #
4 #------------------------------------------------------------------------------
5 # University of Minnesota
6 # Copyright 2017, Regents of the University of Minnesota
7 #------------------------------------------------------------------------------
8 # Author:
9 #
10 # James E Johnson
11 #
12 #------------------------------------------------------------------------------
13 """
14
15 import argparse
16 import sys
17
18
19 class BedEntry(object):
20 def __init__(self, chrom=None, chromStart=None, chromEnd=None,
21 name=None, score=None, strand=None,
22 thickStart=None, thickEnd=None, itemRgb=None,
23 blockCount=None, blockSizes=None, blockStarts=None):
24 self.chrom = chrom
25 self.chromStart = int(chromStart)
26 self.chromEnd = int(chromEnd)
27 self.name = name
28 self.score = int(score) if score is not None else 0
29 self.strand = '-' if str(strand).startswith('-') else '+'
30 self.thickStart = int(thickStart) if thickStart else self.chromStart
31 self.thickEnd = int(thickEnd) if thickEnd else self.chromEnd
32 self.itemRgb = str(itemRgb) if itemRgb is not None else r'100,100,100'
33 self.blockCount = int(blockCount)
34 if isinstance(blockSizes, str) or isinstance(blockSizes, unicode):
35 self.blockSizes = [int(x) for x in blockSizes.split(',')]
36 elif isinstance(blockSizes, list):
37 self.blockSizes = [int(x) for x in blockSizes]
38 else:
39 self.blockSizes = blockSizes
40 if isinstance(blockStarts, str) or isinstance(blockSizes, unicode):
41 self.blockStarts = [int(x) for x in blockStarts.split(',')]
42 elif isinstance(blockStarts, list):
43 self.blockStarts = [int(x) for x in blockStarts]
44 else:
45 self.blockStarts = blockStarts
46
47 def __str__(self):
48 return '%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s' % (
49 self.chrom, self.chromStart, self.chromEnd,
50 self.name, self.score, self.strand,
51 self.thickStart, self.thickEnd, str(self.itemRgb), self.blockCount,
52 ','.join([str(x) for x in self.blockSizes]),
53 ','.join([str(x) for x in self.blockStarts]))
54
55
56 def __main__():
57 parser = argparse.ArgumentParser(
58 description='Retrieve Ensembl cDNAs and three frame translate')
59 parser.add_argument(
60 'input',
61 help='GFFCompare annotated GTF file, (-) for stdin')
62 parser.add_argument(
63 'output',
64 help='BED file, (-) for stdout')
65 parser.add_argument(
66 '-C', '--class_code', action='append', default=[],
67 help='Restrict output to gffcompare class codes')
68 parser.add_argument('-d', '--debug', action='store_true', help='Debug')
69 args = parser.parse_args()
70
71 # print >> sys.stderr, "args: %s" % args
72 input_rdr = open(args.input, 'r') if args.input != '-' else sys.stdin
73 output_wtr = open(args.output, 'w') if args.output != '-' else sys.stdout
74
75 def write_bed_entry(bed):
76 if bed.blockCount == 0:
77 bed.blockCount = 1
78 output_wtr.write("%s\n" % str(bed))
79
80 class_codes = [c.strip() for codes in args.class_code
81 for c in codes.split(',')] if args.class_code else None
82 bed = None
83 class_code = None
84 for i, line in enumerate(input_rdr):
85 if line.startswith('#'):
86 continue
87 fields = line.rstrip('\r\n').split('\t')
88 if len(fields) != 9:
89 continue
90 (seqname, source, feature, start, end,
91 score, strand, frame, attributes) = fields
92 attribute = {i[0]: i[1].strip('"') for i in [j.strip().split(' ')
93 for j in attributes.rstrip(';').split(';')]}
94 if feature == 'transcript':
95 if args.debug:
96 print >> sys.stderr, "%s\t%s"\
97 % ('\t'.join([seqname, source, feature,
98 start, end, score, strand, frame]),
99 attribute)
100 if bed is not None:
101 write_bed_entry(bed)
102 bed = None
103 class_code = attribute['class_code'].strip('"')\
104 if 'class_code' in attribute else None
105 if class_codes and class_code not in class_codes:
106 continue
107 chromStart = int(start) - 1
108 chromEnd = int(end)
109 cat = '_' + class_code if class_code and class_code != '=' else ''
110 bed = BedEntry(chrom=seqname,
111 chromStart=chromStart, chromEnd=chromEnd,
112 name=attribute['transcript_id'] + cat,
113 strand=strand,
114 blockCount=0,
115 blockSizes=[chromEnd - chromStart],
116 blockStarts=[0])
117 elif feature == 'exon' and bed is not None:
118 chromStart = int(start) - 1
119 chromEnd = int(end)
120 blockSize = chromEnd - chromStart
121 if bed.blockCount == 0:
122 bed.blockSizes = []
123 bed.blockStarts = []
124 bed.blockSizes.append(blockSize)
125 bed.blockStarts.append(chromStart - bed.chromStart)
126 bed.blockCount += 1
127 if bed is not None:
128 write_bed_entry(bed)
129
130
131 if __name__ == "__main__":
132 __main__()