# HG changeset patch
# User cpt
# Date 1655471111 0
# Node ID 6f4c463131172d9ed33e485a5cb80394fb32d2c5
Uploaded
diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_promote_qualifiers/cpt-macros.xml Fri Jun 17 13:05:11 2022 +0000
@@ -0,0 +1,115 @@
+
+
+
+
+ python
+ biopython
+ requests
+
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/gff3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_promote_qualifiers/gff3.py Fri Jun 17 13:05:11 2022 +0000
@@ -0,0 +1,346 @@
+import copy
+import logging
+
+log = logging.getLogger()
+log.setLevel(logging.WARN)
+
+
+def feature_lambda(
+ feature_list,
+ test,
+ test_kwargs,
+ subfeatures=True,
+ parent=None,
+ invert=False,
+ recurse=True,
+):
+ """Recursively search through features, testing each with a test function, yielding matches.
+
+ GFF3 is a hierachical data structure, so we need to be able to recursively
+ search through features. E.g. if you're looking for a feature with
+ ID='bob.42', you can't just do a simple list comprehension with a test
+ case. You don't know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.
+
+ :type feature_list: list
+ :param feature_list: an iterable of features
+
+ :type test: function reference
+ :param test: a closure with the method signature (feature, **kwargs) where
+ the kwargs are those passed in the next argument. This
+ function should return True or False, True if the feature is
+ to be yielded as part of the main feature_lambda function, or
+ False if it is to be ignored. This function CAN mutate the
+ features passed to it (think "apply").
+
+ :type test_kwargs: dictionary
+ :param test_kwargs: kwargs to pass to your closure when it is called.
+
+ :type subfeatures: boolean
+ :param subfeatures: when a feature is matched, should just that feature be
+ yielded to the caller, or should the entire sub_feature
+ tree for that feature be included? subfeatures=True is
+ useful in cases such as searching for a gene feature,
+ and wanting to know what RBS/Shine_Dalgarno_sequences
+ are in the sub_feature tree (which can be accomplished
+ with two feature_lambda calls). subfeatures=False is
+ useful in cases when you want to process (and possibly
+ return) the entire feature tree, such as applying a
+ qualifier to every single feature.
+
+ :type invert: boolean
+ :param invert: Negate/invert the result of the filter.
+
+ :rtype: yielded list
+ :return: Yields a list of matching features.
+ """
+ # Either the top level set of [features] or the subfeature attribute
+ for feature in feature_list:
+ feature._parent = parent
+ if not parent:
+ # Set to self so we cannot go above root.
+ feature._parent = feature
+ test_result = test(feature, **test_kwargs)
+ # if (not invert and test_result) or (invert and not test_result):
+ if invert ^ test_result:
+ if not subfeatures:
+ feature_copy = copy.deepcopy(feature)
+ feature_copy.sub_features = list()
+ yield feature_copy
+ else:
+ yield feature
+
+ if recurse and hasattr(feature, "sub_features"):
+ for x in feature_lambda(
+ feature.sub_features,
+ test,
+ test_kwargs,
+ subfeatures=subfeatures,
+ parent=feature,
+ invert=invert,
+ recurse=recurse,
+ ):
+ yield x
+
+
+def fetchParent(feature):
+ if not hasattr(feature, "_parent") or feature._parent is None:
+ return feature
+ else:
+ return fetchParent(feature._parent)
+
+
+def feature_test_true(feature, **kwargs):
+ return True
+
+
+def feature_test_type(feature, **kwargs):
+ if "type" in kwargs:
+ return str(feature.type).upper() == str(kwargs["type"]).upper()
+ elif "types" in kwargs:
+ for x in kwargs["types"]:
+ if str(feature.type).upper() == str(x).upper():
+ return True
+ return False
+ raise Exception("Incorrect feature_test_type call, need type or types")
+
+
+def feature_test_qual_value(feature, **kwargs):
+ """Test qualifier values.
+
+ For every feature, check that at least one value in
+ feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list']
+ """
+ if isinstance(kwargs["qualifier"], list):
+ for qualifier in kwargs["qualifier"]:
+ for attribute_value in feature.qualifiers.get(qualifier, []):
+ if attribute_value in kwargs["attribute_list"]:
+ return True
+ else:
+ for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []):
+ if attribute_value in kwargs["attribute_list"]:
+ return True
+ return False
+
+
+def feature_test_location(feature, **kwargs):
+ if "strand" in kwargs:
+ if feature.location.strand != kwargs["strand"]:
+ return False
+
+ return feature.location.start <= kwargs["loc"] <= feature.location.end
+
+
+def feature_test_quals(feature, **kwargs):
+ """
+ Example::
+
+ a = Feature(qualifiers={'Note': ['Some notes', 'Aasdf']})
+
+ # Check if a contains a Note
+ feature_test_quals(a, {'Note': None}) # Returns True
+ feature_test_quals(a, {'Product': None}) # Returns False
+
+ # Check if a contains a note with specific value
+ feature_test_quals(a, {'Note': ['ome']}) # Returns True
+
+ # Check if a contains a note with specific value
+ feature_test_quals(a, {'Note': ['other']}) # Returns False
+ """
+ for key in kwargs:
+ if key not in feature.qualifiers:
+ return False
+
+ # Key is present, no value specified
+ if kwargs[key] is None:
+ return True
+
+ # Otherwise there is a key value we're looking for.
+ # so we make a list of matches
+ matches = []
+ # And check all of the feature qualifier valuse
+ for value in feature.qualifiers[key]:
+ # For that kwargs[key] value
+ for x in kwargs[key]:
+ matches.append(x in value)
+
+ # If none matched, then we return false.
+ if not any(matches):
+ return False
+
+ return True
+
+
+def feature_test_contains(feature, **kwargs):
+ if "index" in kwargs:
+ return feature.location.start < kwargs["index"] < feature.location.end
+ elif "range" in kwargs:
+ return (
+ feature.location.start < kwargs["range"]["start"] < feature.location.end
+ and feature.location.start < kwargs["range"]["end"] < feature.location.end
+ )
+ else:
+ raise RuntimeError("Must use index or range keyword")
+
+
+def get_id(feature=None, parent_prefix=None):
+ result = ""
+ if parent_prefix is not None:
+ result += parent_prefix + "|"
+ if "locus_tag" in feature.qualifiers:
+ result += feature.qualifiers["locus_tag"][0]
+ elif "gene" in feature.qualifiers:
+ result += feature.qualifiers["gene"][0]
+ elif "Gene" in feature.qualifiers:
+ result += feature.qualifiers["Gene"][0]
+ elif "product" in feature.qualifiers:
+ result += feature.qualifiers["product"][0]
+ elif "Product" in feature.qualifiers:
+ result += feature.qualifiers["Product"][0]
+ elif "Name" in feature.qualifiers:
+ result += feature.qualifiers["Name"][0]
+ else:
+ return feature.id
+ # Leaving in case bad things happen.
+ # result += '%s_%s_%s_%s' % (
+ # feature.id,
+ # feature.location.start,
+ # feature.location.end,
+ # feature.location.strand
+ # )
+ return result
+
+
+def get_gff3_id(gene):
+ return gene.qualifiers.get("Name", [gene.id])[0]
+
+
+def ensure_location_in_bounds(start=0, end=0, parent_length=0):
+ # This prevents frameshift errors
+ while start < 0:
+ start += 3
+ while end < 0:
+ end += 3
+ while start > parent_length:
+ start -= 3
+ while end > parent_length:
+ end -= 3
+ return (start, end)
+
+
+def coding_genes(feature_list):
+ for x in genes(feature_list):
+ if (
+ len(
+ list(
+ feature_lambda(
+ x.sub_features,
+ feature_test_type,
+ {"type": "CDS"},
+ subfeatures=False,
+ )
+ )
+ )
+ > 0
+ ):
+ yield x
+
+
+def genes(feature_list, feature_type="gene", sort=False):
+ """
+ Simple filter to extract gene features from the feature set.
+ """
+
+ if not sort:
+ for x in feature_lambda(
+ feature_list, feature_test_type, {"type": feature_type}, subfeatures=True
+ ):
+ yield x
+ else:
+ data = list(genes(feature_list, feature_type=feature_type, sort=False))
+ data = sorted(data, key=lambda feature: feature.location.start)
+ for x in data:
+ yield x
+
+
+def wa_unified_product_name(feature):
+ """
+ Try and figure out a name. We gave conflicting instructions, so
+ this isn't as trivial as it should be. Sometimes it will be in
+ 'product' or 'Product', othertimes in 'Name'
+ """
+ # Manually applied tags.
+ protein_product = feature.qualifiers.get(
+ "product", feature.qualifiers.get("Product", [None])
+ )[0]
+
+ # If neither of those are available ...
+ if protein_product is None:
+ # And there's a name...
+ if "Name" in feature.qualifiers:
+ if not is_uuid(feature.qualifiers["Name"][0]):
+ protein_product = feature.qualifiers["Name"][0]
+
+ return protein_product
+
+
+def is_uuid(name):
+ return name.count("-") == 4 and len(name) == 36
+
+
+def get_rbs_from(gene):
+ # Normal RBS annotation types
+ rbs_rbs = list(
+ feature_lambda(
+ gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False
+ )
+ )
+ rbs_sds = list(
+ feature_lambda(
+ gene.sub_features,
+ feature_test_type,
+ {"type": "Shine_Dalgarno_sequence"},
+ subfeatures=False,
+ )
+ )
+ # Fraking apollo
+ apollo_exons = list(
+ feature_lambda(
+ gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False
+ )
+ )
+ apollo_exons = [x for x in apollo_exons if len(x) < 10]
+ # These are more NCBI's style
+ regulatory_elements = list(
+ feature_lambda(
+ gene.sub_features,
+ feature_test_type,
+ {"type": "regulatory"},
+ subfeatures=False,
+ )
+ )
+ rbs_regulatory = list(
+ feature_lambda(
+ regulatory_elements,
+ feature_test_quals,
+ {"regulatory_class": ["ribosome_binding_site"]},
+ subfeatures=False,
+ )
+ )
+ # Here's hoping you find just one ;)
+ return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons
+
+
+def nice_name(record):
+ """
+ get the real name rather than NCBI IDs and so on. If fails, will return record.id
+ """
+ name = record.id
+ likely_parental_contig = list(genes(record.features, feature_type="contig"))
+ if len(likely_parental_contig) == 1:
+ name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]
+ return name
+
+
+def fsort(it):
+ for i in sorted(it, key=lambda x: int(x.location.start)):
+ yield i
diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_promote_qualifiers/macros.xml Fri Jun 17 13:05:11 2022 +0000
@@ -0,0 +1,85 @@
+
+
+
+
+ python
+ biopython
+ cpt_gffparser
+
+
+
+
+ "$blast_tsv"
+
+
+
+
+
+
+ "$blast_xml"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ "$gff3_data"
+
+
+#if str($reference_genome.reference_genome_source) == 'cached':
+ "${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+#end if
+
+
+#if $reference_genome.reference_genome_source == 'history':
+ ln -s $reference_genome.genome_fasta genomeref.fa;
+#end if
+
+
+#if str($reference_genome.reference_genome_source) == 'cached':
+ "${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+#end if
+
+
+
+
+
+
+ "$sequences"
+
+
+
+
+
diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/promote_qualifier.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_promote_qualifiers/promote_qualifier.py Fri Jun 17 13:05:11 2022 +0000
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+import argparse
+import sys
+import logging
+from CPT_GFFParser import gffParse, gffWrite
+from gff3 import feature_lambda, feature_test_type
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+
+def promote_qualifier(qualifier, parent, child, gff3):
+ for record in gffParse(gff3):
+ for parent_feature in feature_lambda(
+ record.features, feature_test_type, {"type": parent}, subfeatures=True
+ ):
+ # for each feature of the parent type, get the first subfeature of the child type
+ try:
+ first_child = sorted(
+ list(
+ feature_lambda(
+ parent_feature.sub_features,
+ feature_test_type,
+ {"type": child},
+ subfeatures=False,
+ )
+ ),
+ key=lambda x: x.location.start
+ if parent_feature.strand > 0
+ else x.location.end,
+ reverse=False if parent_feature.strand > 0 else True,
+ )[0]
+ except IndexError:
+ logging.warning("Child type %s not found under parent %s" % (child, parent_feature.qualifiers["ID"]))
+ continue
+ try:
+ parent_feature.qualifiers[qualifier] = first_child.qualifiers[qualifier]
+ logging.info(
+ "Promoted %s=%s in child %s to parent %s"
+ % (
+ qualifier,
+ first_child.qualifiers[qualifier],
+ first_child.qualifiers["ID"],
+ parent_feature.qualifiers["ID"],
+ )
+ )
+ except KeyError:
+ logging.warning(
+ "Qualifier %s not found in child feature %s"
+ % (qualifier, first_child.qualifiers["ID"])
+ )
+ gffWrite([record], sys.stdout)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Promote a child feature's qualifer to the parent feature's qualifier",
+ epilog="",
+ )
+ parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File")
+ parser.add_argument(
+ "parent",
+ type=str,
+ help="Feature type of the target parent feature (ex: gene, mrna, exon",
+ )
+ parser.add_argument(
+ "child",
+ type=str,
+ help="Feature type of the target child feature (ex: mrna, exon, CDS",
+ )
+ parser.add_argument(
+ "qualifier", help="Sepcific qualifier to promote (ex: Name, product, notes"
+ )
+ args = parser.parse_args()
+ promote_qualifier(**vars(args))
diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/promote_qualifier.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_promote_qualifiers/promote_qualifier.xml Fri Jun 17 13:05:11 2022 +0000
@@ -0,0 +1,67 @@
+
+
+ Promote a child feature's qualifier into the parent feature
+
+ macros.xml
+ cpt-macros.xml
+
+
+ $output]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/test-data/promote_qualifiers_in.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_promote_qualifiers/test-data/promote_qualifiers_in.gff3 Fri Jun 17 13:05:11 2022 +0000
@@ -0,0 +1,68 @@
+##gff-version 3
+##sequence-region NC_049846.1 1 49045
+NC_049846.1 . gene 630 1182 . + . owner=benburrowes@tamu.edu;ID=8942304a-f4f8-4429-82b1-2b28f07f8b1e;date_last_modified=2020-09-25;Name=NC_049846.1.gene_1;date_creation=2020-09-25
+NC_049846.1 . mRNA 630 1182 . + . owner=benburrowes@tamu.edu;Parent=8942304a-f4f8-4429-82b1-2b28f07f8b1e;ID=536ddf54-8e6c-4928-99cb-958fbd796706;orig_id=NC_049846.1.gene_1;date_last_modified=2020-09-25;Name=Gene_1_Name;date_creation=2020-09-25
+NC_049846.1 . CDS 643 1182 . + 0 Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=536ddf54-8e6c-4928-99cb-958fbd796706-CDS;Name=536ddf54-8e6c-4928-99cb-958fbd796706-CDS
+NC_049846.1 . exon 643 1182 . + . Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=ae9ae711-1428-4f4b-b3ea-e6a808a2e56e;Name=ae9ae711-1428-4f4b-b3ea-e6a808a2e56e
+NC_049846.1 . Shine_Dalgarno_sequence 630 634 . + . Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=657855ff-d966-4f69-b695-d9e575c9f8e1;Name=657855ff-d966-4f69-b695-d9e575c9f8e1
+###
+NC_049846.1 . gene 1168 1391 . + . owner=benburrowes@tamu.edu;ID=cba0ed94-36c1-4db5-9ddf-66dd9364e918;date_last_modified=2020-09-25;Name=NC_049846.1.gene_2;date_creation=2020-09-25
+NC_049846.1 . mRNA 1168 1391 . + . owner=benburrowes@tamu.edu;Parent=cba0ed94-36c1-4db5-9ddf-66dd9364e918;ID=c146fe26-27b0-4abe-8266-4047c8418fac;orig_id=NC_049846.1.gene_2;date_last_modified=2020-09-25;Name=Gene_2_Name;date_creation=2020-09-25
+NC_049846.1 . CDS 1179 1391 . + 0 Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=c146fe26-27b0-4abe-8266-4047c8418fac-CDS;Name=c146fe26-27b0-4abe-8266-4047c8418fac-CDS
+NC_049846.1 . Shine_Dalgarno_sequence 1168 1172 . + . Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=46a3ab10-8f23-4720-b2e5-0875dc73a44a;Name=46a3ab10-8f23-4720-b2e5-0875dc73a44a
+NC_049846.1 . exon 1179 1391 . + . Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=86d5448a-a20e-4d45-93d3-8b0ecf3dcd9f;Name=86d5448a-a20e-4d45-93d3-8b0ecf3dcd9f
+###
+NC_049846.1 . gene 18540 18935 . + . owner=benburrowes@tamu.edu;ID=007a93d5-5241-4646-9dd3-ec991a73760f;date_last_modified=2020-09-25;Name=NC_049846.1.gene_37.exon;date_creation=2020-09-25
+NC_049846.1 . mRNA 18768 18935 . + . owner=benburrowes@tamu.edu;Parent=007a93d5-5241-4646-9dd3-ec991a73760f;ID=b05601f7-fd9e-4787-8d06-783d970d1aa0;orig_id=NC_049846.1.orf00047.exon;date_last_modified=2020-09-25;Name=Gene_3_Name_2;date_creation=2020-09-25
+NC_049846.1 . CDS 18768 18935 . + 0 Parent=b05601f7-fd9e-4787-8d06-783d970d1aa0;ID=a292e7ae-f32a-42b3-968b-a45b0c459246;Name=a292e7ae-f32a-42b3-968b-a45b0c459246
+NC_049846.1 . exon 18768 18935 . + . Parent=b05601f7-fd9e-4787-8d06-783d970d1aa0;ID=0b26ee78-40a7-4f45-92b4-1bc4615eab57;Name=0b26ee78-40a7-4f45-92b4-1bc4615eab57
+NC_049846.1 . mRNA 18540 18935 . + . owner=benburrowes@tamu.edu;Parent=007a93d5-5241-4646-9dd3-ec991a73760f;ID=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;orig_id=NC_049846.1.gene_37.exon;date_last_modified=2020-09-25;Name=Gene_3_Name_1;date_creation=2020-09-25
+NC_049846.1 . exon 18540 18935 . + . Parent=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;ID=477d89cd-31bc-4dee-b661-b6f1e98a5ae1;Name=477d89cd-31bc-4dee-b661-b6f1e98a5ae1
+NC_049846.1 . CDS 18540 18935 . + 0 Parent=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;ID=4c25daa0-fd45-464f-87f3-8aaefc3b584d;Name=4c25daa0-fd45-464f-87f3-8aaefc3b584d
+###
+NC_049846.1 . gene 28715 28920 . - . owner=benburrowes@tamu.edu;ID=c81f410e-9108-4274-89b3-c52904a05447;date_last_modified=2020-09-25;Name=NC_049846.1.gene_44;date_creation=2020-09-25
+NC_049846.1 . mRNA 28715 28920 . - . owner=benburrowes@tamu.edu;Parent=c81f410e-9108-4274-89b3-c52904a05447;ID=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;orig_id=NC_049846.1.gene_44;date_last_modified=2020-09-25;Name=Gene_4_Name;date_creation=2020-09-25
+NC_049846.1 . Shine_Dalgarno_sequence 28916 28920 . - . Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=7056fcbc-8380-46dc-847a-20b0adfd47bd;Name=7056fcbc-8380-46dc-847a-20b0adfd47bd
+NC_049846.1 . CDS 28715 28909 . - 0 Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=0deef6f0-0bfc-448a-a9f3-be4f50304c8b-CDS;Name=0deef6f0-0bfc-448a-a9f3-be4f50304c8b-CDS
+NC_049846.1 . exon 28715 28909 . - . Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=1172f6fa-ace1-44dd-852d-dda7ac37bd03;Name=1172f6fa-ace1-44dd-852d-dda7ac37bd03
+###
+NC_049846.1 . gene 28924 30078 . - . owner=benburrowes@tamu.edu;ID=8c23e351-f6b4-4142-8874-e4a8f99a0032;date_last_modified=2020-09-25;Name=NC_049846.1.gene_45.exon;date_creation=2020-09-25
+NC_049846.1 . mRNA 28924 29964 . - . owner=benburrowes@tamu.edu;Parent=8c23e351-f6b4-4142-8874-e4a8f99a0032;ID=3e9f4bdb-22df-431d-b213-7773038332ee;orig_id=CDS.0.3610_0.671668761617;date_last_modified=2020-09-25;Name=Gene_5_Name_2;date_creation=2020-09-25
+NC_049846.1 . Shine_Dalgarno_sequence 29962 29964 . - . Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=e1b42b07-9dc3-4465-bb7f-247c8a8ed462;Name=e1b42b07-9dc3-4465-bb7f-247c8a8ed462
+NC_049846.1 . exon 28924 29952 . - . Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=73c2c908-9ae1-4bd1-9005-7cdd67c08e20;Name=73c2c908-9ae1-4bd1-9005-7cdd67c08e20
+NC_049846.1 . CDS 28924 29952 . - 0 Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=3e9f4bdb-22df-431d-b213-7773038332ee-CDS;Name=3e9f4bdb-22df-431d-b213-7773038332ee-CDS
+NC_049846.1 . mRNA 28924 30078 . - . owner=benburrowes@tamu.edu;Parent=8c23e351-f6b4-4142-8874-e4a8f99a0032;ID=ac8fd0a0-115e-4343-8420-15fd17fa8406;orig_id=NC_049846.1.gene_45.exon;date_last_modified=2020-09-25;Name=Gene_5_Name_1;date_creation=2020-09-25
+NC_049846.1 . CDS 28924 30078 . - 0 Parent=ac8fd0a0-115e-4343-8420-15fd17fa8406;ID=1dfd5942-f407-4af5-a7e0-a295a7cad29d;Name=1dfd5942-f407-4af5-a7e0-a295a7cad29d
+NC_049846.1 . exon 28924 30078 . - . Parent=ac8fd0a0-115e-4343-8420-15fd17fa8406;ID=7b6abf1f-4a08-477b-8061-deb9cd0ecede;Name=7b6abf1f-4a08-477b-8061-deb9cd0ecede
+###
+NC_049846.1 . terminator 7806 7827 . . . owner=benburrowes@tamu.edu;ID=06991b32-9e41-4e3c-869a-1be0a85b42df;orig_id=terminator_3;date_last_modified=2020-09-25;Name=terminator_3;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 8296 8318 . . . owner=benburrowes@tamu.edu;ID=6a49da14-a9db-4da2-b614-b9aa1bb0c90c;orig_id=terminator_7;date_last_modified=2020-09-25;Name=terminator_7;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 15779 15799 . . . owner=benburrowes@tamu.edu;ID=ee4a4880-ab4f-43ca-ad46-eff0c3903ca6;orig_id=terminator_16;date_last_modified=2020-09-25;Name=terminator_16;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 15393 15415 . . . owner=benburrowes@tamu.edu;ID=78be0f25-47fb-4c3a-9b4b-037f7506f34e;orig_id=terminator_15;date_last_modified=2020-09-25;Name=terminator_15;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 18225 18253 . . . owner=benburrowes@tamu.edu;ID=41f56835-6f98-4466-8191-150a3b149c23;orig_id=terminator_19;date_last_modified=2020-09-25;Name=terminator_19;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 18936 18955 . . . owner=benburrowes@tamu.edu;ID=4903968a-a6d0-49f1-bb1a-36f96f3ec8cb;orig_id=terminator_21;date_last_modified=2020-09-25;Name=terminator_21;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 22422 22446 . . . owner=benburrowes@tamu.edu;ID=47172a5f-4cba-436b-ae74-eb83161e4f8d;orig_id=terminator_26;date_last_modified=2020-09-25;Name=terminator_26;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 24572 24601 . . . owner=benburrowes@tamu.edu;ID=7b0bf949-3ced-4274-b7db-576794eacd50;orig_id=terminator_27;date_last_modified=2020-09-25;Name=terminator_27;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 28440 28461 . . . owner=benburrowes@tamu.edu;ID=a540bd8e-32cf-464f-a616-7b8912c28799;orig_id=terminator_28;date_last_modified=2020-09-25;Name=terminator_28;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 28446 28469 . . . owner=benburrowes@tamu.edu;ID=0b5d5ed2-7875-43b2-a8a1-90aff8acc6ea;orig_id=terminator_29;date_last_modified=2020-09-25;Name=terminator_29;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 28490 28509 . . . owner=benburrowes@tamu.edu;ID=203ffe4f-a22d-4bdb-a6d1-5aff3f24d071;orig_id=terminator_31;date_last_modified=2020-09-25;Name=terminator_31;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 28494 28521 . . . owner=benburrowes@tamu.edu;ID=078cbc78-de50-49be-96f7-24c15a721158;orig_id=terminator_30;date_last_modified=2020-09-25;Name=terminator_30;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 31531 31555 . . . owner=benburrowes@tamu.edu;ID=9b02ad6d-118d-4d57-88ad-9c1ad1ebae3b;orig_id=terminator_33;date_last_modified=2020-09-25;Name=terminator_33;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 34271 34306 . . . owner=benburrowes@tamu.edu;ID=fe816a64-f0ab-491c-924c-4f96ad3248b7;orig_id=terminator_36;date_last_modified=2020-09-25;Name=terminator_36;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 42539 42569 . . . owner=benburrowes@tamu.edu;ID=9d4c1e44-3f5e-4b9c-8be0-f71c505bf717;orig_id=terminator_44;date_last_modified=2020-09-25;Name=terminator_44;date_creation=2020-09-25
+###
+NC_049846.1 . terminator 43744 43765 . . . owner=benburrowes@tamu.edu;ID=743fb65b-4c28-4ff3-b39b-4de65e324304;orig_id=terminator_46;date_last_modified=2020-09-25;Name=terminator_46;date_creation=2020-09-25
diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/test-data/promote_qualifiers_out.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_promote_qualifiers/test-data/promote_qualifiers_out.gff3 Fri Jun 17 13:05:11 2022 +0000
@@ -0,0 +1,48 @@
+##gff-version 3
+##sequence-region NC_049846.1 1 49045
+NC_049846.1 . gene 630 1182 . + . owner=benburrowes@tamu.edu;ID=8942304a-f4f8-4429-82b1-2b28f07f8b1e;date_last_modified=2020-09-25;Name=Gene_1_Name;date_creation=2020-09-25;
+NC_049846.1 . mRNA 630 1182 . + . owner=benburrowes@tamu.edu;Parent=8942304a-f4f8-4429-82b1-2b28f07f8b1e;ID=536ddf54-8e6c-4928-99cb-958fbd796706;orig_id=NC_049846.1.gene_1;date_last_modified=2020-09-25;Name=Gene_1_Name;date_creation=2020-09-25;
+NC_049846.1 . CDS 643 1182 . + 0 Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=536ddf54-8e6c-4928-99cb-958fbd796706-CDS;Name=536ddf54-8e6c-4928-99cb-958fbd796706-CDS;
+NC_049846.1 . exon 643 1182 . + . Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=ae9ae711-1428-4f4b-b3ea-e6a808a2e56e;Name=ae9ae711-1428-4f4b-b3ea-e6a808a2e56e;
+NC_049846.1 . Shine_Dalgarno_sequence 630 634 . + . Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=657855ff-d966-4f69-b695-d9e575c9f8e1;Name=657855ff-d966-4f69-b695-d9e575c9f8e1;
+NC_049846.1 . gene 1168 1391 . + . owner=benburrowes@tamu.edu;ID=cba0ed94-36c1-4db5-9ddf-66dd9364e918;date_last_modified=2020-09-25;Name=Gene_2_Name;date_creation=2020-09-25;
+NC_049846.1 . mRNA 1168 1391 . + . owner=benburrowes@tamu.edu;Parent=cba0ed94-36c1-4db5-9ddf-66dd9364e918;ID=c146fe26-27b0-4abe-8266-4047c8418fac;orig_id=NC_049846.1.gene_2;date_last_modified=2020-09-25;Name=Gene_2_Name;date_creation=2020-09-25;
+NC_049846.1 . CDS 1179 1391 . + 0 Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=c146fe26-27b0-4abe-8266-4047c8418fac-CDS;Name=c146fe26-27b0-4abe-8266-4047c8418fac-CDS;
+NC_049846.1 . Shine_Dalgarno_sequence 1168 1172 . + . Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=46a3ab10-8f23-4720-b2e5-0875dc73a44a;Name=46a3ab10-8f23-4720-b2e5-0875dc73a44a;
+NC_049846.1 . exon 1179 1391 . + . Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=86d5448a-a20e-4d45-93d3-8b0ecf3dcd9f;Name=86d5448a-a20e-4d45-93d3-8b0ecf3dcd9f;
+NC_049846.1 . gene 18540 18935 . + . owner=benburrowes@tamu.edu;ID=007a93d5-5241-4646-9dd3-ec991a73760f;date_last_modified=2020-09-25;Name=Gene_3_Name_1;date_creation=2020-09-25;
+NC_049846.1 . mRNA 18768 18935 . + . owner=benburrowes@tamu.edu;Parent=007a93d5-5241-4646-9dd3-ec991a73760f;ID=b05601f7-fd9e-4787-8d06-783d970d1aa0;orig_id=NC_049846.1.orf00047.exon;date_last_modified=2020-09-25;Name=Gene_3_Name_2;date_creation=2020-09-25;
+NC_049846.1 . CDS 18768 18935 . + 0 Parent=b05601f7-fd9e-4787-8d06-783d970d1aa0;ID=a292e7ae-f32a-42b3-968b-a45b0c459246;Name=a292e7ae-f32a-42b3-968b-a45b0c459246;
+NC_049846.1 . exon 18768 18935 . + . Parent=b05601f7-fd9e-4787-8d06-783d970d1aa0;ID=0b26ee78-40a7-4f45-92b4-1bc4615eab57;Name=0b26ee78-40a7-4f45-92b4-1bc4615eab57;
+NC_049846.1 . mRNA 18540 18935 . + . owner=benburrowes@tamu.edu;Parent=007a93d5-5241-4646-9dd3-ec991a73760f;ID=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;orig_id=NC_049846.1.gene_37.exon;date_last_modified=2020-09-25;Name=Gene_3_Name_1;date_creation=2020-09-25;
+NC_049846.1 . exon 18540 18935 . + . Parent=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;ID=477d89cd-31bc-4dee-b661-b6f1e98a5ae1;Name=477d89cd-31bc-4dee-b661-b6f1e98a5ae1;
+NC_049846.1 . CDS 18540 18935 . + 0 Parent=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;ID=4c25daa0-fd45-464f-87f3-8aaefc3b584d;Name=4c25daa0-fd45-464f-87f3-8aaefc3b584d;
+NC_049846.1 . gene 28715 28920 . - . owner=benburrowes@tamu.edu;ID=c81f410e-9108-4274-89b3-c52904a05447;date_last_modified=2020-09-25;Name=Gene_4_Name;date_creation=2020-09-25;
+NC_049846.1 . mRNA 28715 28920 . - . owner=benburrowes@tamu.edu;Parent=c81f410e-9108-4274-89b3-c52904a05447;ID=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;orig_id=NC_049846.1.gene_44;date_last_modified=2020-09-25;Name=Gene_4_Name;date_creation=2020-09-25;
+NC_049846.1 . Shine_Dalgarno_sequence 28916 28920 . - . Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=7056fcbc-8380-46dc-847a-20b0adfd47bd;Name=7056fcbc-8380-46dc-847a-20b0adfd47bd;
+NC_049846.1 . CDS 28715 28909 . - 0 Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=0deef6f0-0bfc-448a-a9f3-be4f50304c8b-CDS;Name=0deef6f0-0bfc-448a-a9f3-be4f50304c8b-CDS;
+NC_049846.1 . exon 28715 28909 . - . Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=1172f6fa-ace1-44dd-852d-dda7ac37bd03;Name=1172f6fa-ace1-44dd-852d-dda7ac37bd03;
+NC_049846.1 . gene 28924 30078 . - . owner=benburrowes@tamu.edu;ID=8c23e351-f6b4-4142-8874-e4a8f99a0032;date_last_modified=2020-09-25;Name=Gene_5_Name_1;date_creation=2020-09-25;
+NC_049846.1 . mRNA 28924 29964 . - . owner=benburrowes@tamu.edu;Parent=8c23e351-f6b4-4142-8874-e4a8f99a0032;ID=3e9f4bdb-22df-431d-b213-7773038332ee;orig_id=CDS.0.3610_0.671668761617;date_last_modified=2020-09-25;Name=Gene_5_Name_2;date_creation=2020-09-25;
+NC_049846.1 . Shine_Dalgarno_sequence 29962 29964 . - . Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=e1b42b07-9dc3-4465-bb7f-247c8a8ed462;Name=e1b42b07-9dc3-4465-bb7f-247c8a8ed462;
+NC_049846.1 . exon 28924 29952 . - . Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=73c2c908-9ae1-4bd1-9005-7cdd67c08e20;Name=73c2c908-9ae1-4bd1-9005-7cdd67c08e20;
+NC_049846.1 . CDS 28924 29952 . - 0 Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=3e9f4bdb-22df-431d-b213-7773038332ee-CDS;Name=3e9f4bdb-22df-431d-b213-7773038332ee-CDS;
+NC_049846.1 . mRNA 28924 30078 . - . owner=benburrowes@tamu.edu;Parent=8c23e351-f6b4-4142-8874-e4a8f99a0032;ID=ac8fd0a0-115e-4343-8420-15fd17fa8406;orig_id=NC_049846.1.gene_45.exon;date_last_modified=2020-09-25;Name=Gene_5_Name_1;date_creation=2020-09-25;
+NC_049846.1 . CDS 28924 30078 . - 0 Parent=ac8fd0a0-115e-4343-8420-15fd17fa8406;ID=1dfd5942-f407-4af5-a7e0-a295a7cad29d;Name=1dfd5942-f407-4af5-a7e0-a295a7cad29d;
+NC_049846.1 . exon 28924 30078 . - . Parent=ac8fd0a0-115e-4343-8420-15fd17fa8406;ID=7b6abf1f-4a08-477b-8061-deb9cd0ecede;Name=7b6abf1f-4a08-477b-8061-deb9cd0ecede;
+NC_049846.1 . terminator 7806 7827 . . . owner=benburrowes@tamu.edu;ID=06991b32-9e41-4e3c-869a-1be0a85b42df;orig_id=terminator_3;date_last_modified=2020-09-25;Name=terminator_3;date_creation=2020-09-25;
+NC_049846.1 . terminator 8296 8318 . . . owner=benburrowes@tamu.edu;ID=6a49da14-a9db-4da2-b614-b9aa1bb0c90c;orig_id=terminator_7;date_last_modified=2020-09-25;Name=terminator_7;date_creation=2020-09-25;
+NC_049846.1 . terminator 15779 15799 . . . owner=benburrowes@tamu.edu;ID=ee4a4880-ab4f-43ca-ad46-eff0c3903ca6;orig_id=terminator_16;date_last_modified=2020-09-25;Name=terminator_16;date_creation=2020-09-25;
+NC_049846.1 . terminator 15393 15415 . . . owner=benburrowes@tamu.edu;ID=78be0f25-47fb-4c3a-9b4b-037f7506f34e;orig_id=terminator_15;date_last_modified=2020-09-25;Name=terminator_15;date_creation=2020-09-25;
+NC_049846.1 . terminator 18225 18253 . . . owner=benburrowes@tamu.edu;ID=41f56835-6f98-4466-8191-150a3b149c23;orig_id=terminator_19;date_last_modified=2020-09-25;Name=terminator_19;date_creation=2020-09-25;
+NC_049846.1 . terminator 18936 18955 . . . owner=benburrowes@tamu.edu;ID=4903968a-a6d0-49f1-bb1a-36f96f3ec8cb;orig_id=terminator_21;date_last_modified=2020-09-25;Name=terminator_21;date_creation=2020-09-25;
+NC_049846.1 . terminator 22422 22446 . . . owner=benburrowes@tamu.edu;ID=47172a5f-4cba-436b-ae74-eb83161e4f8d;orig_id=terminator_26;date_last_modified=2020-09-25;Name=terminator_26;date_creation=2020-09-25;
+NC_049846.1 . terminator 24572 24601 . . . owner=benburrowes@tamu.edu;ID=7b0bf949-3ced-4274-b7db-576794eacd50;orig_id=terminator_27;date_last_modified=2020-09-25;Name=terminator_27;date_creation=2020-09-25;
+NC_049846.1 . terminator 28440 28461 . . . owner=benburrowes@tamu.edu;ID=a540bd8e-32cf-464f-a616-7b8912c28799;orig_id=terminator_28;date_last_modified=2020-09-25;Name=terminator_28;date_creation=2020-09-25;
+NC_049846.1 . terminator 28446 28469 . . . owner=benburrowes@tamu.edu;ID=0b5d5ed2-7875-43b2-a8a1-90aff8acc6ea;orig_id=terminator_29;date_last_modified=2020-09-25;Name=terminator_29;date_creation=2020-09-25;
+NC_049846.1 . terminator 28490 28509 . . . owner=benburrowes@tamu.edu;ID=203ffe4f-a22d-4bdb-a6d1-5aff3f24d071;orig_id=terminator_31;date_last_modified=2020-09-25;Name=terminator_31;date_creation=2020-09-25;
+NC_049846.1 . terminator 28494 28521 . . . owner=benburrowes@tamu.edu;ID=078cbc78-de50-49be-96f7-24c15a721158;orig_id=terminator_30;date_last_modified=2020-09-25;Name=terminator_30;date_creation=2020-09-25;
+NC_049846.1 . terminator 31531 31555 . . . owner=benburrowes@tamu.edu;ID=9b02ad6d-118d-4d57-88ad-9c1ad1ebae3b;orig_id=terminator_33;date_last_modified=2020-09-25;Name=terminator_33;date_creation=2020-09-25;
+NC_049846.1 . terminator 34271 34306 . . . owner=benburrowes@tamu.edu;ID=fe816a64-f0ab-491c-924c-4f96ad3248b7;orig_id=terminator_36;date_last_modified=2020-09-25;Name=terminator_36;date_creation=2020-09-25;
+NC_049846.1 . terminator 42539 42569 . . . owner=benburrowes@tamu.edu;ID=9d4c1e44-3f5e-4b9c-8be0-f71c505bf717;orig_id=terminator_44;date_last_modified=2020-09-25;Name=terminator_44;date_creation=2020-09-25;
+NC_049846.1 . terminator 43744 43765 . . . owner=benburrowes@tamu.edu;ID=743fb65b-4c28-4ff3-b39b-4de65e324304;orig_id=terminator_46;date_last_modified=2020-09-25;Name=terminator_46;date_creation=2020-09-25;