Mercurial > repos > galaxyp > gffcompare_to_bed
comparison gffcompare_to_bed.py @ 0:7e572e148175 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/gffcompare_to_bed commit 321b217382f6be33bd77c7dbb51c8caf5fa50afe
author | galaxyp |
---|---|
date | Thu, 11 Jan 2018 11:16:51 -0500 |
parents | |
children | 9a4cfc910674 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7e572e148175 |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 # | |
4 #------------------------------------------------------------------------------ | |
5 # University of Minnesota | |
6 # Copyright 2017, Regents of the University of Minnesota | |
7 #------------------------------------------------------------------------------ | |
8 # Author: | |
9 # | |
10 # James E Johnson | |
11 # | |
12 #------------------------------------------------------------------------------ | |
13 """ | |
14 | |
15 import argparse | |
16 import sys | |
17 | |
18 | |
19 class BedEntry(object): | |
20 def __init__(self, chrom=None, chromStart=None, chromEnd=None, | |
21 name=None, score=None, strand=None, | |
22 thickStart=None, thickEnd=None, itemRgb=None, | |
23 blockCount=None, blockSizes=None, blockStarts=None): | |
24 self.chrom = chrom | |
25 self.chromStart = int(chromStart) | |
26 self.chromEnd = int(chromEnd) | |
27 self.name = name | |
28 self.score = int(score) if score is not None else 0 | |
29 self.strand = '-' if str(strand).startswith('-') else '+' | |
30 self.thickStart = int(thickStart) if thickStart else self.chromStart | |
31 self.thickEnd = int(thickEnd) if thickEnd else self.chromEnd | |
32 self.itemRgb = str(itemRgb) if itemRgb is not None else r'100,100,100' | |
33 self.blockCount = int(blockCount) | |
34 if isinstance(blockSizes, str) or isinstance(blockSizes, unicode): | |
35 self.blockSizes = [int(x) for x in blockSizes.split(',')] | |
36 elif isinstance(blockSizes, list): | |
37 self.blockSizes = [int(x) for x in blockSizes] | |
38 else: | |
39 self.blockSizes = blockSizes | |
40 if isinstance(blockStarts, str) or isinstance(blockSizes, unicode): | |
41 self.blockStarts = [int(x) for x in blockStarts.split(',')] | |
42 elif isinstance(blockStarts, list): | |
43 self.blockStarts = [int(x) for x in blockStarts] | |
44 else: | |
45 self.blockStarts = blockStarts | |
46 | |
47 def __str__(self): | |
48 return '%s\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%s\t%d\t%s\t%s' % ( | |
49 self.chrom, self.chromStart, self.chromEnd, | |
50 self.name, self.score, self.strand, | |
51 self.thickStart, self.thickEnd, str(self.itemRgb), self.blockCount, | |
52 ','.join([str(x) for x in self.blockSizes]), | |
53 ','.join([str(x) for x in self.blockStarts])) | |
54 | |
55 | |
56 def __main__(): | |
57 parser = argparse.ArgumentParser( | |
58 description='Retrieve Ensembl cDNAs and three frame translate') | |
59 parser.add_argument( | |
60 'input', | |
61 help='GFFCompare annotated GTF file, (-) for stdin') | |
62 parser.add_argument( | |
63 'output', | |
64 help='BED file, (-) for stdout') | |
65 parser.add_argument( | |
66 '-C', '--class_code', action='append', default=[], | |
67 help='Restrict output to gffcompare class codes') | |
68 parser.add_argument('-d', '--debug', action='store_true', help='Debug') | |
69 args = parser.parse_args() | |
70 | |
71 # print >> sys.stderr, "args: %s" % args | |
72 input_rdr = open(args.input, 'r') if args.input != '-' else sys.stdin | |
73 output_wtr = open(args.output, 'w') if args.output != '-' else sys.stdout | |
74 | |
75 def write_bed_entry(bed): | |
76 if bed.blockCount == 0: | |
77 bed.blockCount = 1 | |
78 output_wtr.write("%s\n" % str(bed)) | |
79 | |
80 class_codes = [c.strip() for codes in args.class_code | |
81 for c in codes.split(',')] if args.class_code else None | |
82 bed = None | |
83 class_code = None | |
84 for i, line in enumerate(input_rdr): | |
85 if line.startswith('#'): | |
86 continue | |
87 fields = line.rstrip('\r\n').split('\t') | |
88 if len(fields) != 9: | |
89 continue | |
90 (seqname, source, feature, start, end, | |
91 score, strand, frame, attributes) = fields | |
92 attribute = {i[0]: i[1].strip('"') for i in [j.strip().split(' ') | |
93 for j in attributes.rstrip(';').split(';')]} | |
94 if feature == 'transcript': | |
95 if args.debug: | |
96 print >> sys.stderr, "%s\t%s"\ | |
97 % ('\t'.join([seqname, source, feature, | |
98 start, end, score, strand, frame]), | |
99 attribute) | |
100 if bed is not None: | |
101 write_bed_entry(bed) | |
102 bed = None | |
103 class_code = attribute['class_code'].strip('"')\ | |
104 if 'class_code' in attribute else None | |
105 if class_codes and class_code not in class_codes: | |
106 continue | |
107 chromStart = int(start) - 1 | |
108 chromEnd = int(end) | |
109 cat = '_' + class_code if class_code and class_code != '=' else '' | |
110 bed = BedEntry(chrom=seqname, | |
111 chromStart=chromStart, chromEnd=chromEnd, | |
112 name=attribute['transcript_id'] + cat, | |
113 strand=strand, | |
114 blockCount=0, | |
115 blockSizes=[chromEnd - chromStart], | |
116 blockStarts=[0]) | |
117 elif feature == 'exon' and bed is not None: | |
118 chromStart = int(start) - 1 | |
119 chromEnd = int(end) | |
120 blockSize = chromEnd - chromStart | |
121 if bed.blockCount == 0: | |
122 bed.blockSizes = [] | |
123 bed.blockStarts = [] | |
124 bed.blockSizes.append(blockSize) | |
125 bed.blockStarts.append(chromStart - bed.chromStart) | |
126 bed.blockCount += 1 | |
127 if bed is not None: | |
128 write_bed_entry(bed) | |
129 | |
130 | |
131 if __name__ == "__main__": | |
132 __main__() |