diff aragorn_out_to_gff3.py @ 3:b86f3e5626f4 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/trna_prediction commit 1b3f92275bb60e606cd4fdc394fb9df95232a5aa
author bgruening
date Tue, 28 Oct 2025 09:15:42 +0000
parents 358f58401cd6
children
line wrap: on
line diff
--- a/aragorn_out_to_gff3.py	Wed Jul 26 10:14:05 2017 -0400
+++ b/aragorn_out_to_gff3.py	Tue Oct 28 09:15:42 2025 +0000
@@ -2,138 +2,168 @@
 import sys
 
 full_gene_model = False
-if '--full' in sys.argv:
+if "--full" in sys.argv:
     full_gene_model = True
 
 genome_id = None
 stdin_data = []
-KEY_ORDER = ('parent', 'source', 'type', 'start', 'end', 'score', 'strand',
-             '8', 'quals')
+KEY_ORDER = (
+    "parent",
+    "source",
+    "type",
+    "start",
+    "end",
+    "score",
+    "strand",
+    "8",
+    "quals",
+)
 
 # Table of amino acids
 aa_table = {
-    'Ala' : 'A',
-    'Arg' : 'R',
-    'Asn' : 'N',
-    'Asp' : 'D',
-    'Cys' : 'C',
-    'Gln' : 'Q',
-    'Glu' : 'E',
-    'Gly' : 'G',
-    'His' : 'H',
-    'Ile' : 'I',
-    'Leu' : 'L',
-    'Lys' : 'K',
-    'Met' : 'M',
-    'Phe' : 'F',
-    'Pro' : 'P',
-    'Ser' : 'S',
-    'Thr' : 'T',
-    'Trp' : 'W',
-    'Tyr' : 'Y',
-    'Val' : 'V',
-    'Pyl' : 'O',
-    'seC' : 'U',
-    '???' : 'X' }
+    "Ala": "A",
+    "Arg": "R",
+    "Asn": "N",
+    "Asp": "D",
+    "Cys": "C",
+    "Gln": "Q",
+    "Glu": "E",
+    "Gly": "G",
+    "His": "H",
+    "Ile": "I",
+    "Leu": "L",
+    "Lys": "K",
+    "Met": "M",
+    "Phe": "F",
+    "Pro": "P",
+    "Ser": "S",
+    "Thr": "T",
+    "Trp": "W",
+    "Tyr": "Y",
+    "Val": "V",
+    "Pyl": "O",
+    "seC": "U",
+    "???": "X",
+}
+
 
 def output_line(gff3):
-    print '\t'.join(str(gff3[x]) for x in KEY_ORDER)
+    print("\t".join(str(gff3[x]) for x in KEY_ORDER))
 
-print '##gff-version 3'
+
+print("##gff-version 3")
 for line in sys.stdin:
-    if line.startswith('>'):
+    if line.startswith(">"):
         genome_id = line[1:].strip()
-        if ' ' in genome_id:
-            genome_id = genome_id[0:genome_id.index(' ')]
+        if " " in genome_id:
+            genome_id = genome_id[0: genome_id.index(" ")]
     else:
         data = line.split()
         if len(data) == 5:
             # Parse data
-            strand = '-' if data[2].startswith('c') else '+'
-            start, end = data[2][data[2].index('[') + 1:-1].split(',')
+            strand = "-" if data[2].startswith("c") else "+"
+            start, end = data[2][data[2].index("[") + 1: -1].split(",")
 
             gff3 = {
-                'parent': genome_id,
-                'source': 'aragorn',
-                'start': int(start),
-                'end': int(end),
-                'strand': strand,
-                'score': '.',
-                '8': '.',
+                "parent": genome_id,
+                "source": "aragorn",
+                "start": int(start),
+                "end": int(end),
+                "strand": strand,
+                "score": ".",
+                "8": ".",
             }
 
             aa_long = data[1][5:]
             aa_short = aa_table[aa_long]
-            anticodon = data[4][1:data[4].index(")")].upper().replace("T", "U")
-            name = 'trn{}-{}'.format(aa_short, anticodon)
+            anticodon = data[4][1: data[4].index(")")].upper().replace("T", "U")
+            name = "trn{}-{}".format(aa_short, anticodon)
 
             if not full_gene_model:
-                gff3.update({
-                    'type': 'tRNA',
-                    'quals': 'ID=tRNA{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name),
-                })
+                gff3.update(
+                    {
+                        "type": "tRNA",
+                        "quals": "ID=tRNA{0}.{1};Name={name};product={2}".format(
+                            genome_id, *data, name=name
+                        ),
+                    }
+                )
                 output_line(gff3)
             else:
-                gff3.update({
-                    'type': 'gene',
-                    'quals': 'ID=gene{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name),
-                })
+                gff3.update(
+                    {
+                        "type": "gene",
+                        "quals": "ID=gene{0}.{1};Name={name};product={2}".format(
+                            genome_id, *data, name=name
+                        ),
+                    }
+                )
                 output_line(gff3)
-                gff3.update({
-                    'type': 'tRNA',
-                    'quals': 'ID=tRNA{0}.{1};Parent=gene{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name),
-                })
+                gff3.update(
+                    {
+                        "type": "tRNA",
+                        "quals": "ID=tRNA{0}.{1};Parent=gene{0}.{1};Name={name};product={2}".format(
+                            genome_id, *data, name=name
+                        ),
+                    }
+                )
                 output_line(gff3)
 
                 # If no introns
-                if ')i(' not in data[4]:
-                    gff3['type'] = 'exon'
-                    gff3['quals'] = 'Parent=tRNA{0}.{1}'.format(genome_id, *data)
+                if ")i(" not in data[4]:
+                    gff3["type"] = "exon"
+                    gff3["quals"] = "Parent=tRNA{0}.{1}".format(genome_id, *data)
                     output_line(gff3)
                 else:
-                    intron_location = data[4][data[4].rindex('(') + 1:-1].split(',')
+                    intron_location = data[4][data[4].rindex("(") + 1: -1].split(",")
                     intron_start, intron_length = map(int, intron_location)
-                    if strand == '+':
-                        original_end = gff3['end']
+                    if strand == "+":
+                        original_end = gff3["end"]
                     else:
-                        original_end = gff3['start']
+                        original_end = gff3["start"]
 
                     # EXON
-                    gff3.update({
-                        'type': 'exon',
-                        'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data),
-                    })
-                    if strand == '+':
-                        gff3['end'] = gff3['start'] + intron_start - 2
+                    gff3.update(
+                        {
+                            "type": "exon",
+                            "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data),
+                        }
+                    )
+                    if strand == "+":
+                        gff3["end"] = gff3["start"] + intron_start - 2
                     else:
-                        gff3['start'] = gff3['end'] - intron_start + 2
+                        gff3["start"] = gff3["end"] - intron_start + 2
 
                     output_line(gff3)
 
                     # INTRON
-                    gff3.update({
-                        'type': 'intron',
-                        'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data),
-                    })
-                    if strand == '+':
-                        gff3['start'] = gff3['end'] + 1
-                        gff3['end'] = gff3['start'] + intron_length + 2
+                    gff3.update(
+                        {
+                            "type": "intron",
+                            "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data),
+                        }
+                    )
+                    if strand == "+":
+                        gff3["start"] = gff3["end"] + 1
+                        gff3["end"] = gff3["start"] + intron_length + 2
                     else:
-                        gff3['end'] = gff3['start'] - 1
-                        gff3['start'] = gff3['end'] - intron_length + 1
+                        gff3["end"] = gff3["start"] - 1
+                        gff3["start"] = gff3["end"] - intron_length + 1
 
                     output_line(gff3)
 
                     # EXON
-                    gff3.update({
-                        'type': 'exon',
-                        'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data),
-                    })
-                    if strand == '+':
-                        gff3['start'] = gff3['end'] + 1
-                        gff3['end'] = original_end
+                    gff3.update(
+                        {
+                            "type": "exon",
+                            "quals": "Parent=tRNA{0}.{1}".format(genome_id, *data),
+                        }
+                    )
+                    if strand == "+":
+                        gff3["start"] = gff3["end"] + 1
+                        gff3["end"] = original_end
                     else:
-                        gff3['end'] = gff3['start'] - 1
-                        gff3['start'] = original_end
+                        gff3["end"] = gff3["start"] - 1
+                        gff3["start"] = original_end
 
                     output_line(gff3)