Mercurial > repos > bgruening > trna_prediction
view aragorn_out_to_gff3.py @ 3:b86f3e5626f4 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/trna_prediction commit 1b3f92275bb60e606cd4fdc394fb9df95232a5aa
| author | bgruening | 
|---|---|
| date | Tue, 28 Oct 2025 09:15:42 +0000 | 
| parents | 358f58401cd6 | 
| children | 
line wrap: on
 line source
#!/usr/bin/env python import sys full_gene_model = False if "--full" in sys.argv: full_gene_model = True genome_id = None stdin_data = [] KEY_ORDER = ( "parent", "source", "type", "start", "end", "score", "strand", "8", "quals", ) # Table of amino acids aa_table = { "Ala": "A", "Arg": "R", "Asn": "N", "Asp": "D", "Cys": "C", "Gln": "Q", "Glu": "E", "Gly": "G", "His": "H", "Ile": "I", "Leu": "L", "Lys": "K", "Met": "M", "Phe": "F", "Pro": "P", "Ser": "S", "Thr": "T", "Trp": "W", "Tyr": "Y", "Val": "V", "Pyl": "O", "seC": "U", "???": "X", } def output_line(gff3): print("\t".join(str(gff3[x]) for x in KEY_ORDER)) print("##gff-version 3") for line in sys.stdin: if line.startswith(">"): genome_id = line[1:].strip() if " " in genome_id: genome_id = genome_id[0: genome_id.index(" ")] else: data = line.split() if len(data) == 5: # Parse data strand = "-" if data[2].startswith("c") else "+" start, end = data[2][data[2].index("[") + 1: -1].split(",") gff3 = { "parent": genome_id, "source": "aragorn", "start": int(start), "end": int(end), "strand": strand, "score": ".", "8": ".", } aa_long = data[1][5:] aa_short = aa_table[aa_long] anticodon = data[4][1: data[4].index(")")].upper().replace("T", "U") name = "trn{}-{}".format(aa_short, anticodon) if not full_gene_model: gff3.update( { "type": "tRNA", "quals": "ID=tRNA{0}.{1};Name={name};product={2}".format( genome_id, *data, name=name ), } ) output_line(gff3) else: gff3.update( { "type": "gene", "quals": "ID=gene{0}.{1};Name={name};product={2}".format( genome_id, *data, name=name ), } ) output_line(gff3) gff3.update( { "type": "tRNA", "quals": "ID=tRNA{0}.{1};Parent=gene{0}.{1};Name={name};product={2}".format( genome_id, *data, name=name ), } ) output_line(gff3) # If no introns if ")i(" not in data[4]: gff3["type"] = "exon" gff3["quals"] = "Parent=tRNA{0}.{1}".format(genome_id, *data) output_line(gff3) else: intron_location = data[4][data[4].rindex("(") + 1: -1].split(",") intron_start, intron_length = map(int, intron_location) if strand == "+": original_end = gff3["end"] else: original_end = gff3["start"] # EXON gff3.update( { "type": "exon", "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data), } ) if strand == "+": gff3["end"] = gff3["start"] + intron_start - 2 else: gff3["start"] = gff3["end"] - intron_start + 2 output_line(gff3) # INTRON gff3.update( { "type": "intron", "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data), } ) if strand == "+": gff3["start"] = gff3["end"] + 1 gff3["end"] = gff3["start"] + intron_length + 2 else: gff3["end"] = gff3["start"] - 1 gff3["start"] = gff3["end"] - intron_length + 1 output_line(gff3) # EXON gff3.update( { "type": "exon", "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data), } ) if strand == "+": gff3["start"] = gff3["end"] + 1 gff3["end"] = original_end else: gff3["end"] = gff3["start"] - 1 gff3["start"] = original_end output_line(gff3)
