changeset 3:f0f0ab9db43f draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:42:12 +0000
parents dc22c76a57bd
children 733cb0807083
files cpt-macros.xml cpt_fix_aragorn/cpt-macros.xml cpt_fix_aragorn/fix-aragorn-gff3.py cpt_fix_aragorn/fix-aragorn-gff3.xml cpt_fix_aragorn/gff3.py cpt_fix_aragorn/macros.xml cpt_fix_aragorn/test-data/FixAra_In.gff3 cpt_fix_aragorn/test-data/FixAra_Out.gff3 fix-aragorn-gff3.py fix-aragorn-gff3.xml gff3.py macros.xml test-data/FixAra_In.gff3 test-data/FixAra_Out.gff3
diffstat 14 files changed, 641 insertions(+), 631 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml	Mon Jun 05 02:42:12 2023 +0000
@@ -0,0 +1,115 @@
+<macros>
+    <xml name="gff_requirements">
+        <requirements>
+            <requirement type="package" version="2.7">python</requirement>
+            <requirement type="package" version="1.65">biopython</requirement>
+            <requirement type="package" version="2.12.1">requests</requirement>
+			<requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+        <version_command>
+		<![CDATA[
+			cd '$__tool_directory__' && git rev-parse HEAD
+		]]>
+		</version_command>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+		author = {E. Mijalis, H. Rasche},
+		title = {CPT Galaxy Tools},
+		year = {2013-2017},
+		note = {https://github.com/tamu-cpt/galaxy-tools/}
+		}
+		</citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Ross},
+				title = {CPT Galaxy Tools},
+				year = {2020-},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+        <yield/>
+    </xml>
+</macros>
--- a/cpt_fix_aragorn/cpt-macros.xml	Fri May 20 08:46:58 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-<?xml version="1.0"?>
-<macros>
-	<xml name="gff_requirements">
-		<requirements>
-			<requirement type="package" version="2.7">python</requirement>
-			<requirement type="package" version="1.65">biopython</requirement>
-			<requirement type="package" version="2.12.1">requests</requirement>
-			<yield/>
-		</requirements>
-		<version_command>
-		<![CDATA[
-			cd $__tool_directory__ && git rev-parse HEAD
-		]]>
-		</version_command>
-	</xml>
-	<xml name="citation/mijalisrasche">
-		<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-		<citation type="bibtex">@unpublished{galaxyTools,
-		author = {E. Mijalis, H. Rasche},
-		title = {CPT Galaxy Tools},
-		year = {2013-2017},
-		note = {https://github.com/tamu-cpt/galaxy-tools/}
-		}
-		</citation>
-	</xml>
-	<xml name="citations">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {E. Mijalis, H. Rasche},
-				title = {CPT Galaxy Tools},
-				year = {2013-2017},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation> 
-		<yield/>
-		</citations>
-	</xml>
-    	<xml name="citations-crr">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Ross},
-				title = {CPT Galaxy Tools},
-				year = {2020-},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-		<yield/>
-		</citations>
-	</xml>
-        <xml name="citations-2020">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {E. Mijalis, H. Rasche},
-				title = {CPT Galaxy Tools},
-				year = {2013-2017},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {A. Criscione},
-				title = {CPT Galaxy Tools},
-				year = {2019-2021},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-                        </citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="citations-2020-AJC-solo">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {A. Criscione},
-				title = {CPT Galaxy Tools},
-				year = {2019-2021},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-                        </citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="citations-clm">
-		<citations>
-			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Maughmer},
-				title = {CPT Galaxy Tools},
-				year = {2017-2020},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <yield/>
-		</citations>
-	</xml>
-        <xml name="sl-citations-clm">
-			<citation type="bibtex">
-			@unpublished{galaxyTools,
-				author = {C. Maughmer},
-				title = {CPT Galaxy Tools},
-				year = {2017-2020},
-				note = {https://github.com/tamu-cpt/galaxy-tools/}
-			}
-			</citation>
-                        <yield/>
-	</xml>
-</macros>
--- a/cpt_fix_aragorn/fix-aragorn-gff3.py	Fri May 20 08:46:58 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,65 +0,0 @@
-#!/usr/bin/env python
-import sys
-import logging
-import argparse
-from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
-from Bio.SeqFeature import SeqFeature
-from gff3 import feature_lambda, feature_test_type
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger(__name__)
-
-
-def fixed_feature(rec):
-    for idx, feature in enumerate(
-        feature_lambda(
-            rec.features, feature_test_type, {"types": ["tRNA", "tmRNA"]}, subfeatures=True
-        )
-    ):
-        
-        fid = "%s-%03d" % (feature.type, 1 + idx)
-        try:
-            name = [feature.type + "-" + feature.qualifiers["Codon"][0]]
-        except KeyError:
-            name = [feature.qualifiers['product'][0]]
-        try:
-          origSource = feature.qualifiers["source"][0]
-        except:
-          origSource = "."
-        gene = gffSeqFeature(
-            location=feature.location,
-            type="gene",
-            qualifiers={"ID": [fid + ".gene"], "source": [origSource], "Name": name},
-        )
-        feature.qualifiers["Name"] = name
-        # Below that we have an mRNA
-        exon = gffSeqFeature(
-            location=feature.location,
-            type="exon",
-            qualifiers={"source": [origSource], "ID": ["%s.exon" % fid], "Name": name},
-        )
-        feature.qualifiers["ID"] = [fid]
-        exon.qualifiers["Parent"] = [fid]
-        feature.qualifiers["Parent"] = [fid + ".gene"]
-        # gene -> trna -> exon
-        feature.sub_features = [exon]
-        gene.sub_features = [feature]
-        yield gene
-
-
-def gff_filter(gff3):
-    found_gff = False
-    for rec in gffParse(gff3):
-        found_gff = True
-        rec.features = sorted(list(fixed_feature(rec)), key=lambda x: x.location.start)
-        rec.annotations = {}
-        gffWrite([rec], sys.stdout)
-    if not found_gff:
-        print("##gff-version 3")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="add parent gene features to CDSs")
-    parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")
-    args = parser.parse_args()
-    gff_filter(**vars(args))
--- a/cpt_fix_aragorn/fix-aragorn-gff3.xml	Fri May 20 08:46:58 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,33 +0,0 @@
-<?xml version="1.0"?>
-<tool id="edu.tamu.cpt.external.aragorn-gff3" name="Fix tRNA model" version="19.1.0.0">
-  <description></description>
-  <macros>
-    <import>macros.xml</import>
-    <import>cpt-macros.xml</import>
-  </macros>
-  <expand macro="requirements"/>
-  <command detect_errors="aggressive"><![CDATA[
-$__tool_directory__/fix-aragorn-gff3.py
-@INPUT_GFF@
-> $default]]></command>
-  <inputs>
-      <expand macro="gff3_input" />
-  </inputs>
-  <outputs>
-    <data format="gff3" name="default"/>
-  </outputs>
-  <tests>
-    <test>
-      <param name="gff3_data" value="FixAra_In.gff3"/>
-      <output name="default" file="FixAra_Out.gff3"/>
-    </test>
-  </tests>
-  <help><![CDATA[
-**What it does**
-
-For an input GFF3 file with tRNAs from the Aragorn or converted from the tRNAscan-SE tools, this tool modifies 
-the gene model to reflect a gene-tRNA-exon hierarchy. That change is needed 
-to allow for creation of proper tRNA features in Apollo.
-      ]]></help>
-		<expand macro="citations" />
-</tool>
--- a/cpt_fix_aragorn/gff3.py	Fri May 20 08:46:58 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,346 +0,0 @@
-import copy
-import logging
-
-log = logging.getLogger()
-log.setLevel(logging.WARN)
-
-
-def feature_lambda(
-    feature_list,
-    test,
-    test_kwargs,
-    subfeatures=True,
-    parent=None,
-    invert=False,
-    recurse=True,
-):
-    """Recursively search through features, testing each with a test function, yielding matches.
-
-    GFF3 is a hierachical data structure, so we need to be able to recursively
-    search through features. E.g. if you're looking for a feature with
-    ID='bob.42', you can't just do a simple list comprehension with a test
-    case. You don't know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.
-
-    :type feature_list: list
-    :param feature_list: an iterable of features
-
-    :type test: function reference
-    :param test: a closure with the method signature (feature, **kwargs) where
-                 the kwargs are those passed in the next argument. This
-                 function should return True or False, True if the feature is
-                 to be yielded as part of the main feature_lambda function, or
-                 False if it is to be ignored. This function CAN mutate the
-                 features passed to it (think "apply").
-
-    :type test_kwargs: dictionary
-    :param test_kwargs: kwargs to pass to your closure when it is called.
-
-    :type subfeatures: boolean
-    :param subfeatures: when a feature is matched, should just that feature be
-                        yielded to the caller, or should the entire sub_feature
-                        tree for that feature be included? subfeatures=True is
-                        useful in cases such as searching for a gene feature,
-                        and wanting to know what RBS/Shine_Dalgarno_sequences
-                        are in the sub_feature tree (which can be accomplished
-                        with two feature_lambda calls). subfeatures=False is
-                        useful in cases when you want to process (and possibly
-                        return) the entire feature tree, such as applying a
-                        qualifier to every single feature.
-
-    :type invert: boolean
-    :param invert: Negate/invert the result of the filter.
-
-    :rtype: yielded list
-    :return: Yields a list of matching features.
-    """
-    # Either the top level set of [features] or the subfeature attribute
-    for feature in feature_list:
-        feature._parent = parent
-        if not parent:
-            # Set to self so we cannot go above root.
-            feature._parent = feature
-        test_result = test(feature, **test_kwargs)
-        # if (not invert and test_result) or (invert and not test_result):
-        if invert ^ test_result:
-            if not subfeatures:
-                feature_copy = copy.deepcopy(feature)
-                feature_copy.sub_features = list()
-                yield feature_copy
-            else:
-                yield feature
-
-        if recurse and hasattr(feature, "sub_features"):
-            for x in feature_lambda(
-                feature.sub_features,
-                test,
-                test_kwargs,
-                subfeatures=subfeatures,
-                parent=feature,
-                invert=invert,
-                recurse=recurse,
-            ):
-                yield x
-
-
-def fetchParent(feature):
-    if not hasattr(feature, "_parent") or feature._parent is None:
-        return feature
-    else:
-        return fetchParent(feature._parent)
-
-
-def feature_test_true(feature, **kwargs):
-    return True
-
-
-def feature_test_type(feature, **kwargs):
-    if "type" in kwargs:
-        return str(feature.type).upper() == str(kwargs["type"]).upper()
-    elif "types" in kwargs:
-      for x in kwargs["types"]:
-        if str(feature.type).upper() == str(x).upper():
-          return True
-      return False
-    raise Exception("Incorrect feature_test_type call, need type or types")
-
-
-def feature_test_qual_value(feature, **kwargs):
-    """Test qualifier values.
-
-    For every feature, check that at least one value in
-    feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list']
-    """
-    if isinstance(kwargs["qualifier"], list):
-        for qualifier in kwargs["qualifier"]:
-            for attribute_value in feature.qualifiers.get(qualifier, []):
-                if attribute_value in kwargs["attribute_list"]:
-                    return True
-    else:
-        for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []):
-            if attribute_value in kwargs["attribute_list"]:
-                return True
-    return False
-
-
-def feature_test_location(feature, **kwargs):
-    if "strand" in kwargs:
-        if feature.location.strand != kwargs["strand"]:
-            return False
-
-    return feature.location.start <= kwargs["loc"] <= feature.location.end
-
-
-def feature_test_quals(feature, **kwargs):
-    """
-    Example::
-
-        a = Feature(qualifiers={'Note': ['Some notes', 'Aasdf']})
-
-        # Check if a contains a Note
-        feature_test_quals(a, {'Note': None})  # Returns True
-        feature_test_quals(a, {'Product': None})  # Returns False
-
-        # Check if a contains a note with specific value
-        feature_test_quals(a, {'Note': ['ome']})  # Returns True
-
-        # Check if a contains a note with specific value
-        feature_test_quals(a, {'Note': ['other']})  # Returns False
-    """
-    for key in kwargs:
-        if key not in feature.qualifiers:
-            return False
-
-        # Key is present, no value specified
-        if kwargs[key] is None:
-            return True
-
-        # Otherwise there is a key value we're looking for.
-        # so we make a list of matches
-        matches = []
-        # And check all of the feature qualifier valuse
-        for value in feature.qualifiers[key]:
-            # For that kwargs[key] value
-            for x in kwargs[key]:
-                matches.append(x in value)
-
-        # If none matched, then we return false.
-        if not any(matches):
-            return False
-
-    return True
-
-
-def feature_test_contains(feature, **kwargs):
-    if "index" in kwargs:
-        return feature.location.start < kwargs["index"] < feature.location.end
-    elif "range" in kwargs:
-        return (
-            feature.location.start < kwargs["range"]["start"] < feature.location.end
-            and feature.location.start < kwargs["range"]["end"] < feature.location.end
-        )
-    else:
-        raise RuntimeError("Must use index or range keyword")
-
-
-def get_id(feature=None, parent_prefix=None):
-    result = ""
-    if parent_prefix is not None:
-        result += parent_prefix + "|"
-    if "locus_tag" in feature.qualifiers:
-        result += feature.qualifiers["locus_tag"][0]
-    elif "gene" in feature.qualifiers:
-        result += feature.qualifiers["gene"][0]
-    elif "Gene" in feature.qualifiers:
-        result += feature.qualifiers["Gene"][0]
-    elif "product" in feature.qualifiers:
-        result += feature.qualifiers["product"][0]
-    elif "Product" in feature.qualifiers:
-        result += feature.qualifiers["Product"][0]
-    elif "Name" in feature.qualifiers:
-        result += feature.qualifiers["Name"][0]
-    else:
-        return feature.id
-        # Leaving in case bad things happen.
-        # result += '%s_%s_%s_%s' % (
-        # feature.id,
-        # feature.location.start,
-        # feature.location.end,
-        # feature.location.strand
-        # )
-    return result
-
-
-def get_gff3_id(gene):
-    return gene.qualifiers.get("Name", [gene.id])[0]
-
-
-def ensure_location_in_bounds(start=0, end=0, parent_length=0):
-    # This prevents frameshift errors
-    while start < 0:
-        start += 3
-    while end < 0:
-        end += 3
-    while start > parent_length:
-        start -= 3
-    while end > parent_length:
-        end -= 3
-    return (start, end)
-
-
-def coding_genes(feature_list):
-    for x in genes(feature_list):
-        if (
-            len(
-                list(
-                    feature_lambda(
-                        x.sub_features,
-                        feature_test_type,
-                        {"type": "CDS"},
-                        subfeatures=False,
-                    )
-                )
-            )
-            > 0
-        ):
-            yield x
-
-
-def genes(feature_list, feature_type="gene", sort=False):
-    """
-    Simple filter to extract gene features from the feature set.
-    """
-
-    if not sort:
-        for x in feature_lambda(
-            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True
-        ):
-            yield x
-    else:
-        data = list(genes(feature_list, feature_type=feature_type, sort=False))
-        data = sorted(data, key=lambda feature: feature.location.start)
-        for x in data:
-            yield x
-
-
-def wa_unified_product_name(feature):
-    """
-    Try and figure out a name. We gave conflicting instructions, so
-    this isn't as trivial as it should be. Sometimes it will be in
-    'product' or 'Product', othertimes in 'Name'
-    """
-    # Manually applied tags.
-    protein_product = feature.qualifiers.get(
-        "product", feature.qualifiers.get("Product", [None])
-    )[0]
-
-    # If neither of those are available ...
-    if protein_product is None:
-        # And there's a name...
-        if "Name" in feature.qualifiers:
-            if not is_uuid(feature.qualifiers["Name"][0]):
-                protein_product = feature.qualifiers["Name"][0]
-
-    return protein_product
-
-
-def is_uuid(name):
-    return name.count("-") == 4 and len(name) == 36
-
-
-def get_rbs_from(gene):
-    # Normal RBS annotation types
-    rbs_rbs = list(
-        feature_lambda(
-            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False
-        )
-    )
-    rbs_sds = list(
-        feature_lambda(
-            gene.sub_features,
-            feature_test_type,
-            {"type": "Shine_Dalgarno_sequence"},
-            subfeatures=False,
-        )
-    )
-    # Fraking apollo
-    apollo_exons = list(
-        feature_lambda(
-            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False
-        )
-    )
-    apollo_exons = [x for x in apollo_exons if len(x) < 10]
-    # These are more NCBI's style
-    regulatory_elements = list(
-        feature_lambda(
-            gene.sub_features,
-            feature_test_type,
-            {"type": "regulatory"},
-            subfeatures=False,
-        )
-    )
-    rbs_regulatory = list(
-        feature_lambda(
-            regulatory_elements,
-            feature_test_quals,
-            {"regulatory_class": ["ribosome_binding_site"]},
-            subfeatures=False,
-        )
-    )
-    # Here's hoping you find just one ;)
-    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons
-
-
-def nice_name(record):
-    """
-    get the real name rather than NCBI IDs and so on. If fails, will return record.id
-    """
-    name = record.id
-    likely_parental_contig = list(genes(record.features, feature_type="contig"))
-    if len(likely_parental_contig) == 1:
-        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]
-    return name
-
-
-def fsort(it):
-    for i in sorted(it, key=lambda x: int(x.location.start)):
-        yield i
--- a/cpt_fix_aragorn/macros.xml	Fri May 20 08:46:58 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-<?xml version="1.0"?>
-<macros>
-	<xml name="requirements">
-		<requirements>
-			<requirement type="package" version="3.8.13">python</requirement>
-			<requirement type="package" version="1.79">biopython</requirement>
-			<requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
-			<yield/>
-		</requirements>
-	</xml>
-	<xml name="genome_selector">
-		<conditional name="reference_genome">
-			<param name="reference_genome_source" type="select" label="Reference Genome">
-				<option value="history" selected="True">From History</option>
-				<option value="cached">Locally Cached</option>
-			</param>
-			<when value="cached">
-				<param name="fasta_indexes" type="select" label="Source FASTA Sequence">
-					<options from_data_table="all_fasta"/>
-				</param>
-			</when>
-			<when value="history">
-				<param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
-			</when>
-		</conditional>
-	</xml>
-	<xml name="gff3_input">
-		<param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
-	</xml>
-	<xml name="input/gff3+fasta">
-		<expand macro="gff3_input" />
-		<expand macro="genome_selector" />
-	</xml>
-	<token name="@INPUT_GFF@">
-	"$gff3_data"
-	</token>
-	<token name="@INPUT_FASTA@">
-#if str($reference_genome.reference_genome_source) == 'cached':
-		"${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
-		genomeref.fa
-#end if
-	</token>
-	<token name="@GENOME_SELECTOR_PRE@">
-#if $reference_genome.reference_genome_source == 'history':
-		ln -s $reference_genome.genome_fasta genomeref.fa;
-#end if
-	</token>
-	<token name="@GENOME_SELECTOR@">
-#if str($reference_genome.reference_genome_source) == 'cached':
-		"${reference_genome.fasta_indexes.fields.path}"
-#else if str($reference_genome.reference_genome_source) == 'history':
-		genomeref.fa
-#end if
-	</token>
-        <xml name="input/fasta">
-		<param label="Fasta file" name="sequences" type="data" format="fasta"/>
-	</xml>
-
-	<token name="@SEQUENCE@">
-		"$sequences"
-	</token>
-	<xml name="input/fasta/protein">
-		<param label="Protein fasta file" name="sequences" type="data" format="fasta"/>
-	</xml>
-</macros>
--- a/cpt_fix_aragorn/test-data/FixAra_In.gff3	Fri May 20 08:46:58 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-##gff-version-3
-Phriendly	aragorn	tRNA	48610	48685	.	-	.	Anticodon="tcc";Codon="Gly"
--- a/cpt_fix_aragorn/test-data/FixAra_Out.gff3	Fri May 20 08:46:58 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-##gff-version 3
-Phriendly	feature	gene	48610	48685	.	-	.	ID=tRNA-001.gene;source=.;Name=tRNA-"Gly";
-Phriendly	aragorn	tRNA	48610	48685	.	-	.	Anticodon="tcc";Codon="Gly";Name=tRNA-"Gly";ID=tRNA-001;Parent=tRNA-001.gene;
-Phriendly	feature	exon	48610	48685	.	-	.	source=.;ID=tRNA-001.exon;Name=tRNA-"Gly";Parent=tRNA-001;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fix-aragorn-gff3.py	Mon Jun 05 02:42:12 2023 +0000
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+import sys
+import logging
+import argparse
+from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
+from Bio.SeqFeature import SeqFeature
+from gff3 import feature_lambda, feature_test_type
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+
+def fixed_feature(rec):
+    for idx, feature in enumerate(
+        feature_lambda(
+            rec.features,
+            feature_test_type,
+            {"types": ["tRNA", "tmRNA"]},
+            subfeatures=True,
+        )
+    ):
+
+        fid = "%s-%03d" % (feature.type, 1 + idx)
+        try:
+            name = [feature.type + "-" + feature.qualifiers["Codon"][0]]
+        except KeyError:
+            name = [feature.qualifiers["product"][0]]
+        try:
+            origSource = feature.qualifiers["source"][0]
+        except:
+            origSource = "."
+        gene = gffSeqFeature(
+            location=feature.location,
+            type="gene",
+            qualifiers={"ID": [fid + ".gene"], "source": [origSource], "Name": name},
+        )
+        feature.qualifiers["Name"] = name
+        # Below that we have an mRNA
+        exon = gffSeqFeature(
+            location=feature.location,
+            type="exon",
+            qualifiers={"source": [origSource], "ID": ["%s.exon" % fid], "Name": name},
+        )
+        feature.qualifiers["ID"] = [fid]
+        exon.qualifiers["Parent"] = [fid]
+        feature.qualifiers["Parent"] = [fid + ".gene"]
+        # gene -> trna -> exon
+        feature.sub_features = [exon]
+        gene.sub_features = [feature]
+        yield gene
+
+
+def gff_filter(gff3):
+    found_gff = False
+    for rec in gffParse(gff3):
+        found_gff = True
+        rec.features = sorted(list(fixed_feature(rec)), key=lambda x: x.location.start)
+        rec.annotations = {}
+        gffWrite([rec], sys.stdout)
+    if not found_gff:
+        print("##gff-version 3")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="add parent gene features to CDSs")
+    parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")
+    args = parser.parse_args()
+    gff_filter(**vars(args))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fix-aragorn-gff3.xml	Mon Jun 05 02:42:12 2023 +0000
@@ -0,0 +1,32 @@
+<tool id="edu.tamu.cpt.external.aragorn-gff3" name="Fix tRNA model" version="19.1.0.0">
+  <description/>
+  <macros>
+    <import>macros.xml</import>
+    <import>cpt-macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <command detect_errors="aggressive"><![CDATA[
+'$__tool_directory__/fix-aragorn-gff3.py'
+@INPUT_GFF@
+> '$default']]></command>
+  <inputs>
+    <expand macro="gff3_input"/>
+  </inputs>
+  <outputs>
+    <data format="gff3" name="default"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="gff3_data" value="FixAra_In.gff3"/>
+      <output name="default" file="FixAra_Out.gff3"/>
+    </test>
+  </tests>
+  <help><![CDATA[
+**What it does**
+
+For an input GFF3 file with tRNAs from the Aragorn or converted from the tRNAscan-SE tools, this tool modifies 
+the gene model to reflect a gene-tRNA-exon hierarchy. That change is needed 
+to allow for creation of proper tRNA features in Apollo.
+      ]]></help>
+  <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3.py	Mon Jun 05 02:42:12 2023 +0000
@@ -0,0 +1,346 @@
+import copy
+import logging
+
+log = logging.getLogger()
+log.setLevel(logging.WARN)
+
+
+def feature_lambda(
+    feature_list,
+    test,
+    test_kwargs,
+    subfeatures=True,
+    parent=None,
+    invert=False,
+    recurse=True,
+):
+    """Recursively search through features, testing each with a test function, yielding matches.
+
+    GFF3 is a hierachical data structure, so we need to be able to recursively
+    search through features. E.g. if you're looking for a feature with
+    ID='bob.42', you can't just do a simple list comprehension with a test
+    case. You don't know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.
+
+    :type feature_list: list
+    :param feature_list: an iterable of features
+
+    :type test: function reference
+    :param test: a closure with the method signature (feature, **kwargs) where
+                 the kwargs are those passed in the next argument. This
+                 function should return True or False, True if the feature is
+                 to be yielded as part of the main feature_lambda function, or
+                 False if it is to be ignored. This function CAN mutate the
+                 features passed to it (think "apply").
+
+    :type test_kwargs: dictionary
+    :param test_kwargs: kwargs to pass to your closure when it is called.
+
+    :type subfeatures: boolean
+    :param subfeatures: when a feature is matched, should just that feature be
+                        yielded to the caller, or should the entire sub_feature
+                        tree for that feature be included? subfeatures=True is
+                        useful in cases such as searching for a gene feature,
+                        and wanting to know what RBS/Shine_Dalgarno_sequences
+                        are in the sub_feature tree (which can be accomplished
+                        with two feature_lambda calls). subfeatures=False is
+                        useful in cases when you want to process (and possibly
+                        return) the entire feature tree, such as applying a
+                        qualifier to every single feature.
+
+    :type invert: boolean
+    :param invert: Negate/invert the result of the filter.
+
+    :rtype: yielded list
+    :return: Yields a list of matching features.
+    """
+    # Either the top level set of [features] or the subfeature attribute
+    for feature in feature_list:
+        feature._parent = parent
+        if not parent:
+            # Set to self so we cannot go above root.
+            feature._parent = feature
+        test_result = test(feature, **test_kwargs)
+        # if (not invert and test_result) or (invert and not test_result):
+        if invert ^ test_result:
+            if not subfeatures:
+                feature_copy = copy.deepcopy(feature)
+                feature_copy.sub_features = list()
+                yield feature_copy
+            else:
+                yield feature
+
+        if recurse and hasattr(feature, "sub_features"):
+            for x in feature_lambda(
+                feature.sub_features,
+                test,
+                test_kwargs,
+                subfeatures=subfeatures,
+                parent=feature,
+                invert=invert,
+                recurse=recurse,
+            ):
+                yield x
+
+
+def fetchParent(feature):
+    if not hasattr(feature, "_parent") or feature._parent is None:
+        return feature
+    else:
+        return fetchParent(feature._parent)
+
+
+def feature_test_true(feature, **kwargs):
+    return True
+
+
+def feature_test_type(feature, **kwargs):
+    if "type" in kwargs:
+        return str(feature.type).upper() == str(kwargs["type"]).upper()
+    elif "types" in kwargs:
+        for x in kwargs["types"]:
+            if str(feature.type).upper() == str(x).upper():
+                return True
+        return False
+    raise Exception("Incorrect feature_test_type call, need type or types")
+
+
+def feature_test_qual_value(feature, **kwargs):
+    """Test qualifier values.
+
+    For every feature, check that at least one value in
+    feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list']
+    """
+    if isinstance(kwargs["qualifier"], list):
+        for qualifier in kwargs["qualifier"]:
+            for attribute_value in feature.qualifiers.get(qualifier, []):
+                if attribute_value in kwargs["attribute_list"]:
+                    return True
+    else:
+        for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []):
+            if attribute_value in kwargs["attribute_list"]:
+                return True
+    return False
+
+
+def feature_test_location(feature, **kwargs):
+    if "strand" in kwargs:
+        if feature.location.strand != kwargs["strand"]:
+            return False
+
+    return feature.location.start <= kwargs["loc"] <= feature.location.end
+
+
+def feature_test_quals(feature, **kwargs):
+    """
+    Example::
+
+        a = Feature(qualifiers={'Note': ['Some notes', 'Aasdf']})
+
+        # Check if a contains a Note
+        feature_test_quals(a, {'Note': None})  # Returns True
+        feature_test_quals(a, {'Product': None})  # Returns False
+
+        # Check if a contains a note with specific value
+        feature_test_quals(a, {'Note': ['ome']})  # Returns True
+
+        # Check if a contains a note with specific value
+        feature_test_quals(a, {'Note': ['other']})  # Returns False
+    """
+    for key in kwargs:
+        if key not in feature.qualifiers:
+            return False
+
+        # Key is present, no value specified
+        if kwargs[key] is None:
+            return True
+
+        # Otherwise there is a key value we're looking for.
+        # so we make a list of matches
+        matches = []
+        # And check all of the feature qualifier valuse
+        for value in feature.qualifiers[key]:
+            # For that kwargs[key] value
+            for x in kwargs[key]:
+                matches.append(x in value)
+
+        # If none matched, then we return false.
+        if not any(matches):
+            return False
+
+    return True
+
+
+def feature_test_contains(feature, **kwargs):
+    if "index" in kwargs:
+        return feature.location.start < kwargs["index"] < feature.location.end
+    elif "range" in kwargs:
+        return (
+            feature.location.start < kwargs["range"]["start"] < feature.location.end
+            and feature.location.start < kwargs["range"]["end"] < feature.location.end
+        )
+    else:
+        raise RuntimeError("Must use index or range keyword")
+
+
+def get_id(feature=None, parent_prefix=None):
+    result = ""
+    if parent_prefix is not None:
+        result += parent_prefix + "|"
+    if "locus_tag" in feature.qualifiers:
+        result += feature.qualifiers["locus_tag"][0]
+    elif "gene" in feature.qualifiers:
+        result += feature.qualifiers["gene"][0]
+    elif "Gene" in feature.qualifiers:
+        result += feature.qualifiers["Gene"][0]
+    elif "product" in feature.qualifiers:
+        result += feature.qualifiers["product"][0]
+    elif "Product" in feature.qualifiers:
+        result += feature.qualifiers["Product"][0]
+    elif "Name" in feature.qualifiers:
+        result += feature.qualifiers["Name"][0]
+    else:
+        return feature.id
+        # Leaving in case bad things happen.
+        # result += '%s_%s_%s_%s' % (
+        # feature.id,
+        # feature.location.start,
+        # feature.location.end,
+        # feature.location.strand
+        # )
+    return result
+
+
+def get_gff3_id(gene):
+    return gene.qualifiers.get("Name", [gene.id])[0]
+
+
+def ensure_location_in_bounds(start=0, end=0, parent_length=0):
+    # This prevents frameshift errors
+    while start < 0:
+        start += 3
+    while end < 0:
+        end += 3
+    while start > parent_length:
+        start -= 3
+    while end > parent_length:
+        end -= 3
+    return (start, end)
+
+
+def coding_genes(feature_list):
+    for x in genes(feature_list):
+        if (
+            len(
+                list(
+                    feature_lambda(
+                        x.sub_features,
+                        feature_test_type,
+                        {"type": "CDS"},
+                        subfeatures=False,
+                    )
+                )
+            )
+            > 0
+        ):
+            yield x
+
+
+def genes(feature_list, feature_type="gene", sort=False):
+    """
+    Simple filter to extract gene features from the feature set.
+    """
+
+    if not sort:
+        for x in feature_lambda(
+            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True
+        ):
+            yield x
+    else:
+        data = list(genes(feature_list, feature_type=feature_type, sort=False))
+        data = sorted(data, key=lambda feature: feature.location.start)
+        for x in data:
+            yield x
+
+
+def wa_unified_product_name(feature):
+    """
+    Try and figure out a name. We gave conflicting instructions, so
+    this isn't as trivial as it should be. Sometimes it will be in
+    'product' or 'Product', othertimes in 'Name'
+    """
+    # Manually applied tags.
+    protein_product = feature.qualifiers.get(
+        "product", feature.qualifiers.get("Product", [None])
+    )[0]
+
+    # If neither of those are available ...
+    if protein_product is None:
+        # And there's a name...
+        if "Name" in feature.qualifiers:
+            if not is_uuid(feature.qualifiers["Name"][0]):
+                protein_product = feature.qualifiers["Name"][0]
+
+    return protein_product
+
+
+def is_uuid(name):
+    return name.count("-") == 4 and len(name) == 36
+
+
+def get_rbs_from(gene):
+    # Normal RBS annotation types
+    rbs_rbs = list(
+        feature_lambda(
+            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False
+        )
+    )
+    rbs_sds = list(
+        feature_lambda(
+            gene.sub_features,
+            feature_test_type,
+            {"type": "Shine_Dalgarno_sequence"},
+            subfeatures=False,
+        )
+    )
+    # Fraking apollo
+    apollo_exons = list(
+        feature_lambda(
+            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False
+        )
+    )
+    apollo_exons = [x for x in apollo_exons if len(x) < 10]
+    # These are more NCBI's style
+    regulatory_elements = list(
+        feature_lambda(
+            gene.sub_features,
+            feature_test_type,
+            {"type": "regulatory"},
+            subfeatures=False,
+        )
+    )
+    rbs_regulatory = list(
+        feature_lambda(
+            regulatory_elements,
+            feature_test_quals,
+            {"regulatory_class": ["ribosome_binding_site"]},
+            subfeatures=False,
+        )
+    )
+    # Here's hoping you find just one ;)
+    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons
+
+
+def nice_name(record):
+    """
+    get the real name rather than NCBI IDs and so on. If fails, will return record.id
+    """
+    name = record.id
+    likely_parental_contig = list(genes(record.features, feature_type="contig"))
+    if len(likely_parental_contig) == 1:
+        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]
+    return name
+
+
+def fsort(it):
+    for i in sorted(it, key=lambda x: int(x.location.start)):
+        yield i
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Jun 05 02:42:12 2023 +0000
@@ -0,0 +1,74 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">progressivemauve</requirement>
+            <!--<requirement type="package" version="2.7">python</requirement>-->
+            <requirement type="package" version="0.6.4">bcbiogff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/progressive_mauve">
+        <citation type="doi">10.1371/journal.pone.0011147</citation>
+    </xml>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+		'$xmfa'
+	</token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+		'$sequences'
+	</token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <token name="@INPUT_GFF@">
+	    '$gff3_data'
+	</token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+	</token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+	</token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+	</token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FixAra_In.gff3	Mon Jun 05 02:42:12 2023 +0000
@@ -0,0 +1,2 @@
+##gff-version-3
+Phriendly	aragorn	tRNA	48610	48685	.	-	.	Anticodon="tcc";Codon="Gly"
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/FixAra_Out.gff3	Mon Jun 05 02:42:12 2023 +0000
@@ -0,0 +1,4 @@
+##gff-version 3
+Phriendly	feature	gene	48610	48685	.	-	.	ID=tRNA-001.gene;source=.;Name=tRNA-"Gly";
+Phriendly	aragorn	tRNA	48610	48685	.	-	.	Anticodon="tcc";Codon="Gly";Name=tRNA-"Gly";ID=tRNA-001;Parent=tRNA-001.gene;
+Phriendly	feature	exon	48610	48685	.	-	.	source=.;ID=tRNA-001.exon;Name=tRNA-"Gly";Parent=tRNA-001;