Mercurial > repos > bgruening > trna_prediction
diff aragorn_out_to_gff3.py @ 3:b86f3e5626f4 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/trna_prediction commit 1b3f92275bb60e606cd4fdc394fb9df95232a5aa
| author | bgruening |
|---|---|
| date | Tue, 28 Oct 2025 09:15:42 +0000 |
| parents | 358f58401cd6 |
| children |
line wrap: on
line diff
--- a/aragorn_out_to_gff3.py Wed Jul 26 10:14:05 2017 -0400 +++ b/aragorn_out_to_gff3.py Tue Oct 28 09:15:42 2025 +0000 @@ -2,138 +2,168 @@ import sys full_gene_model = False -if '--full' in sys.argv: +if "--full" in sys.argv: full_gene_model = True genome_id = None stdin_data = [] -KEY_ORDER = ('parent', 'source', 'type', 'start', 'end', 'score', 'strand', - '8', 'quals') +KEY_ORDER = ( + "parent", + "source", + "type", + "start", + "end", + "score", + "strand", + "8", + "quals", +) # Table of amino acids aa_table = { - 'Ala' : 'A', - 'Arg' : 'R', - 'Asn' : 'N', - 'Asp' : 'D', - 'Cys' : 'C', - 'Gln' : 'Q', - 'Glu' : 'E', - 'Gly' : 'G', - 'His' : 'H', - 'Ile' : 'I', - 'Leu' : 'L', - 'Lys' : 'K', - 'Met' : 'M', - 'Phe' : 'F', - 'Pro' : 'P', - 'Ser' : 'S', - 'Thr' : 'T', - 'Trp' : 'W', - 'Tyr' : 'Y', - 'Val' : 'V', - 'Pyl' : 'O', - 'seC' : 'U', - '???' : 'X' } + "Ala": "A", + "Arg": "R", + "Asn": "N", + "Asp": "D", + "Cys": "C", + "Gln": "Q", + "Glu": "E", + "Gly": "G", + "His": "H", + "Ile": "I", + "Leu": "L", + "Lys": "K", + "Met": "M", + "Phe": "F", + "Pro": "P", + "Ser": "S", + "Thr": "T", + "Trp": "W", + "Tyr": "Y", + "Val": "V", + "Pyl": "O", + "seC": "U", + "???": "X", +} + def output_line(gff3): - print '\t'.join(str(gff3[x]) for x in KEY_ORDER) + print("\t".join(str(gff3[x]) for x in KEY_ORDER)) -print '##gff-version 3' + +print("##gff-version 3") for line in sys.stdin: - if line.startswith('>'): + if line.startswith(">"): genome_id = line[1:].strip() - if ' ' in genome_id: - genome_id = genome_id[0:genome_id.index(' ')] + if " " in genome_id: + genome_id = genome_id[0: genome_id.index(" ")] else: data = line.split() if len(data) == 5: # Parse data - strand = '-' if data[2].startswith('c') else '+' - start, end = data[2][data[2].index('[') + 1:-1].split(',') + strand = "-" if data[2].startswith("c") else "+" + start, end = data[2][data[2].index("[") + 1: -1].split(",") gff3 = { - 'parent': genome_id, - 'source': 'aragorn', - 'start': int(start), - 'end': int(end), - 'strand': strand, - 'score': '.', - '8': '.', + "parent": genome_id, + "source": "aragorn", + "start": int(start), + "end": int(end), + "strand": strand, + "score": ".", + "8": ".", } aa_long = data[1][5:] aa_short = aa_table[aa_long] - anticodon = data[4][1:data[4].index(")")].upper().replace("T", "U") - name = 'trn{}-{}'.format(aa_short, anticodon) + anticodon = data[4][1: data[4].index(")")].upper().replace("T", "U") + name = "trn{}-{}".format(aa_short, anticodon) if not full_gene_model: - gff3.update({ - 'type': 'tRNA', - 'quals': 'ID=tRNA{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name), - }) + gff3.update( + { + "type": "tRNA", + "quals": "ID=tRNA{0}.{1};Name={name};product={2}".format( + genome_id, *data, name=name + ), + } + ) output_line(gff3) else: - gff3.update({ - 'type': 'gene', - 'quals': 'ID=gene{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name), - }) + gff3.update( + { + "type": "gene", + "quals": "ID=gene{0}.{1};Name={name};product={2}".format( + genome_id, *data, name=name + ), + } + ) output_line(gff3) - gff3.update({ - 'type': 'tRNA', - 'quals': 'ID=tRNA{0}.{1};Parent=gene{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name), - }) + gff3.update( + { + "type": "tRNA", + "quals": "ID=tRNA{0}.{1};Parent=gene{0}.{1};Name={name};product={2}".format( + genome_id, *data, name=name + ), + } + ) output_line(gff3) # If no introns - if ')i(' not in data[4]: - gff3['type'] = 'exon' - gff3['quals'] = 'Parent=tRNA{0}.{1}'.format(genome_id, *data) + if ")i(" not in data[4]: + gff3["type"] = "exon" + gff3["quals"] = "Parent=tRNA{0}.{1}".format(genome_id, *data) output_line(gff3) else: - intron_location = data[4][data[4].rindex('(') + 1:-1].split(',') + intron_location = data[4][data[4].rindex("(") + 1: -1].split(",") intron_start, intron_length = map(int, intron_location) - if strand == '+': - original_end = gff3['end'] + if strand == "+": + original_end = gff3["end"] else: - original_end = gff3['start'] + original_end = gff3["start"] # EXON - gff3.update({ - 'type': 'exon', - 'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data), - }) - if strand == '+': - gff3['end'] = gff3['start'] + intron_start - 2 + gff3.update( + { + "type": "exon", + "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data), + } + ) + if strand == "+": + gff3["end"] = gff3["start"] + intron_start - 2 else: - gff3['start'] = gff3['end'] - intron_start + 2 + gff3["start"] = gff3["end"] - intron_start + 2 output_line(gff3) # INTRON - gff3.update({ - 'type': 'intron', - 'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data), - }) - if strand == '+': - gff3['start'] = gff3['end'] + 1 - gff3['end'] = gff3['start'] + intron_length + 2 + gff3.update( + { + "type": "intron", + "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data), + } + ) + if strand == "+": + gff3["start"] = gff3["end"] + 1 + gff3["end"] = gff3["start"] + intron_length + 2 else: - gff3['end'] = gff3['start'] - 1 - gff3['start'] = gff3['end'] - intron_length + 1 + gff3["end"] = gff3["start"] - 1 + gff3["start"] = gff3["end"] - intron_length + 1 output_line(gff3) # EXON - gff3.update({ - 'type': 'exon', - 'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data), - }) - if strand == '+': - gff3['start'] = gff3['end'] + 1 - gff3['end'] = original_end + gff3.update( + { + "type": "exon", + "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data), + } + ) + if strand == "+": + gff3["start"] = gff3["end"] + 1 + gff3["end"] = original_end else: - gff3['end'] = gff3['start'] - 1 - gff3['start'] = original_end + gff3["end"] = gff3["start"] - 1 + gff3["start"] = original_end output_line(gff3)
