# HG changeset patch # User cpt # Date 1655471111 0 # Node ID 6f4c463131172d9ed33e485a5cb80394fb32d2c5 Uploaded diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/cpt-macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_promote_qualifiers/cpt-macros.xml Fri Jun 17 13:05:11 2022 +0000 @@ -0,0 +1,115 @@ + + + + + python + biopython + requests + + + + + + + + 10.1371/journal.pcbi.1008214 + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/gff3.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_promote_qualifiers/gff3.py Fri Jun 17 13:05:11 2022 +0000 @@ -0,0 +1,346 @@ +import copy +import logging + +log = logging.getLogger() +log.setLevel(logging.WARN) + + +def feature_lambda( + feature_list, + test, + test_kwargs, + subfeatures=True, + parent=None, + invert=False, + recurse=True, +): + """Recursively search through features, testing each with a test function, yielding matches. + + GFF3 is a hierachical data structure, so we need to be able to recursively + search through features. E.g. if you're looking for a feature with + ID='bob.42', you can't just do a simple list comprehension with a test + case. You don't know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in. + + :type feature_list: list + :param feature_list: an iterable of features + + :type test: function reference + :param test: a closure with the method signature (feature, **kwargs) where + the kwargs are those passed in the next argument. This + function should return True or False, True if the feature is + to be yielded as part of the main feature_lambda function, or + False if it is to be ignored. This function CAN mutate the + features passed to it (think "apply"). + + :type test_kwargs: dictionary + :param test_kwargs: kwargs to pass to your closure when it is called. + + :type subfeatures: boolean + :param subfeatures: when a feature is matched, should just that feature be + yielded to the caller, or should the entire sub_feature + tree for that feature be included? subfeatures=True is + useful in cases such as searching for a gene feature, + and wanting to know what RBS/Shine_Dalgarno_sequences + are in the sub_feature tree (which can be accomplished + with two feature_lambda calls). subfeatures=False is + useful in cases when you want to process (and possibly + return) the entire feature tree, such as applying a + qualifier to every single feature. + + :type invert: boolean + :param invert: Negate/invert the result of the filter. + + :rtype: yielded list + :return: Yields a list of matching features. + """ + # Either the top level set of [features] or the subfeature attribute + for feature in feature_list: + feature._parent = parent + if not parent: + # Set to self so we cannot go above root. + feature._parent = feature + test_result = test(feature, **test_kwargs) + # if (not invert and test_result) or (invert and not test_result): + if invert ^ test_result: + if not subfeatures: + feature_copy = copy.deepcopy(feature) + feature_copy.sub_features = list() + yield feature_copy + else: + yield feature + + if recurse and hasattr(feature, "sub_features"): + for x in feature_lambda( + feature.sub_features, + test, + test_kwargs, + subfeatures=subfeatures, + parent=feature, + invert=invert, + recurse=recurse, + ): + yield x + + +def fetchParent(feature): + if not hasattr(feature, "_parent") or feature._parent is None: + return feature + else: + return fetchParent(feature._parent) + + +def feature_test_true(feature, **kwargs): + return True + + +def feature_test_type(feature, **kwargs): + if "type" in kwargs: + return str(feature.type).upper() == str(kwargs["type"]).upper() + elif "types" in kwargs: + for x in kwargs["types"]: + if str(feature.type).upper() == str(x).upper(): + return True + return False + raise Exception("Incorrect feature_test_type call, need type or types") + + +def feature_test_qual_value(feature, **kwargs): + """Test qualifier values. + + For every feature, check that at least one value in + feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] + """ + if isinstance(kwargs["qualifier"], list): + for qualifier in kwargs["qualifier"]: + for attribute_value in feature.qualifiers.get(qualifier, []): + if attribute_value in kwargs["attribute_list"]: + return True + else: + for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []): + if attribute_value in kwargs["attribute_list"]: + return True + return False + + +def feature_test_location(feature, **kwargs): + if "strand" in kwargs: + if feature.location.strand != kwargs["strand"]: + return False + + return feature.location.start <= kwargs["loc"] <= feature.location.end + + +def feature_test_quals(feature, **kwargs): + """ + Example:: + + a = Feature(qualifiers={'Note': ['Some notes', 'Aasdf']}) + + # Check if a contains a Note + feature_test_quals(a, {'Note': None}) # Returns True + feature_test_quals(a, {'Product': None}) # Returns False + + # Check if a contains a note with specific value + feature_test_quals(a, {'Note': ['ome']}) # Returns True + + # Check if a contains a note with specific value + feature_test_quals(a, {'Note': ['other']}) # Returns False + """ + for key in kwargs: + if key not in feature.qualifiers: + return False + + # Key is present, no value specified + if kwargs[key] is None: + return True + + # Otherwise there is a key value we're looking for. + # so we make a list of matches + matches = [] + # And check all of the feature qualifier valuse + for value in feature.qualifiers[key]: + # For that kwargs[key] value + for x in kwargs[key]: + matches.append(x in value) + + # If none matched, then we return false. + if not any(matches): + return False + + return True + + +def feature_test_contains(feature, **kwargs): + if "index" in kwargs: + return feature.location.start < kwargs["index"] < feature.location.end + elif "range" in kwargs: + return ( + feature.location.start < kwargs["range"]["start"] < feature.location.end + and feature.location.start < kwargs["range"]["end"] < feature.location.end + ) + else: + raise RuntimeError("Must use index or range keyword") + + +def get_id(feature=None, parent_prefix=None): + result = "" + if parent_prefix is not None: + result += parent_prefix + "|" + if "locus_tag" in feature.qualifiers: + result += feature.qualifiers["locus_tag"][0] + elif "gene" in feature.qualifiers: + result += feature.qualifiers["gene"][0] + elif "Gene" in feature.qualifiers: + result += feature.qualifiers["Gene"][0] + elif "product" in feature.qualifiers: + result += feature.qualifiers["product"][0] + elif "Product" in feature.qualifiers: + result += feature.qualifiers["Product"][0] + elif "Name" in feature.qualifiers: + result += feature.qualifiers["Name"][0] + else: + return feature.id + # Leaving in case bad things happen. + # result += '%s_%s_%s_%s' % ( + # feature.id, + # feature.location.start, + # feature.location.end, + # feature.location.strand + # ) + return result + + +def get_gff3_id(gene): + return gene.qualifiers.get("Name", [gene.id])[0] + + +def ensure_location_in_bounds(start=0, end=0, parent_length=0): + # This prevents frameshift errors + while start < 0: + start += 3 + while end < 0: + end += 3 + while start > parent_length: + start -= 3 + while end > parent_length: + end -= 3 + return (start, end) + + +def coding_genes(feature_list): + for x in genes(feature_list): + if ( + len( + list( + feature_lambda( + x.sub_features, + feature_test_type, + {"type": "CDS"}, + subfeatures=False, + ) + ) + ) + > 0 + ): + yield x + + +def genes(feature_list, feature_type="gene", sort=False): + """ + Simple filter to extract gene features from the feature set. + """ + + if not sort: + for x in feature_lambda( + feature_list, feature_test_type, {"type": feature_type}, subfeatures=True + ): + yield x + else: + data = list(genes(feature_list, feature_type=feature_type, sort=False)) + data = sorted(data, key=lambda feature: feature.location.start) + for x in data: + yield x + + +def wa_unified_product_name(feature): + """ + Try and figure out a name. We gave conflicting instructions, so + this isn't as trivial as it should be. Sometimes it will be in + 'product' or 'Product', othertimes in 'Name' + """ + # Manually applied tags. + protein_product = feature.qualifiers.get( + "product", feature.qualifiers.get("Product", [None]) + )[0] + + # If neither of those are available ... + if protein_product is None: + # And there's a name... + if "Name" in feature.qualifiers: + if not is_uuid(feature.qualifiers["Name"][0]): + protein_product = feature.qualifiers["Name"][0] + + return protein_product + + +def is_uuid(name): + return name.count("-") == 4 and len(name) == 36 + + +def get_rbs_from(gene): + # Normal RBS annotation types + rbs_rbs = list( + feature_lambda( + gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False + ) + ) + rbs_sds = list( + feature_lambda( + gene.sub_features, + feature_test_type, + {"type": "Shine_Dalgarno_sequence"}, + subfeatures=False, + ) + ) + # Fraking apollo + apollo_exons = list( + feature_lambda( + gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False + ) + ) + apollo_exons = [x for x in apollo_exons if len(x) < 10] + # These are more NCBI's style + regulatory_elements = list( + feature_lambda( + gene.sub_features, + feature_test_type, + {"type": "regulatory"}, + subfeatures=False, + ) + ) + rbs_regulatory = list( + feature_lambda( + regulatory_elements, + feature_test_quals, + {"regulatory_class": ["ribosome_binding_site"]}, + subfeatures=False, + ) + ) + # Here's hoping you find just one ;) + return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons + + +def nice_name(record): + """ + get the real name rather than NCBI IDs and so on. If fails, will return record.id + """ + name = record.id + likely_parental_contig = list(genes(record.features, feature_type="contig")) + if len(likely_parental_contig) == 1: + name = likely_parental_contig[0].qualifiers.get("organism", [name])[0] + return name + + +def fsort(it): + for i in sorted(it, key=lambda x: int(x.location.start)): + yield i diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_promote_qualifiers/macros.xml Fri Jun 17 13:05:11 2022 +0000 @@ -0,0 +1,85 @@ + + + + + python + biopython + cpt_gffparser + + + + + "$blast_tsv" + + + + + + + "$blast_xml" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + "$gff3_data" + + +#if str($reference_genome.reference_genome_source) == 'cached': + "${reference_genome.fasta_indexes.fields.path}" +#else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa +#end if + + +#if $reference_genome.reference_genome_source == 'history': + ln -s $reference_genome.genome_fasta genomeref.fa; +#end if + + +#if str($reference_genome.reference_genome_source) == 'cached': + "${reference_genome.fasta_indexes.fields.path}" +#else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa +#end if + + + + + + + "$sequences" + + + + + diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/promote_qualifier.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_promote_qualifiers/promote_qualifier.py Fri Jun 17 13:05:11 2022 +0000 @@ -0,0 +1,75 @@ +#!/usr/bin/env python +import argparse +import sys +import logging +from CPT_GFFParser import gffParse, gffWrite +from gff3 import feature_lambda, feature_test_type + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger(__name__) + + +def promote_qualifier(qualifier, parent, child, gff3): + for record in gffParse(gff3): + for parent_feature in feature_lambda( + record.features, feature_test_type, {"type": parent}, subfeatures=True + ): + # for each feature of the parent type, get the first subfeature of the child type + try: + first_child = sorted( + list( + feature_lambda( + parent_feature.sub_features, + feature_test_type, + {"type": child}, + subfeatures=False, + ) + ), + key=lambda x: x.location.start + if parent_feature.strand > 0 + else x.location.end, + reverse=False if parent_feature.strand > 0 else True, + )[0] + except IndexError: + logging.warning("Child type %s not found under parent %s" % (child, parent_feature.qualifiers["ID"])) + continue + try: + parent_feature.qualifiers[qualifier] = first_child.qualifiers[qualifier] + logging.info( + "Promoted %s=%s in child %s to parent %s" + % ( + qualifier, + first_child.qualifiers[qualifier], + first_child.qualifiers["ID"], + parent_feature.qualifiers["ID"], + ) + ) + except KeyError: + logging.warning( + "Qualifier %s not found in child feature %s" + % (qualifier, first_child.qualifiers["ID"]) + ) + gffWrite([record], sys.stdout) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Promote a child feature's qualifer to the parent feature's qualifier", + epilog="", + ) + parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File") + parser.add_argument( + "parent", + type=str, + help="Feature type of the target parent feature (ex: gene, mrna, exon", + ) + parser.add_argument( + "child", + type=str, + help="Feature type of the target child feature (ex: mrna, exon, CDS", + ) + parser.add_argument( + "qualifier", help="Sepcific qualifier to promote (ex: Name, product, notes" + ) + args = parser.parse_args() + promote_qualifier(**vars(args)) diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/promote_qualifier.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_promote_qualifiers/promote_qualifier.xml Fri Jun 17 13:05:11 2022 +0000 @@ -0,0 +1,67 @@ + + + Promote a child feature's qualifier into the parent feature + + macros.xml + cpt-macros.xml + + + $output]]> + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/test-data/promote_qualifiers_in.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_promote_qualifiers/test-data/promote_qualifiers_in.gff3 Fri Jun 17 13:05:11 2022 +0000 @@ -0,0 +1,68 @@ +##gff-version 3 +##sequence-region NC_049846.1 1 49045 +NC_049846.1 . gene 630 1182 . + . owner=benburrowes@tamu.edu;ID=8942304a-f4f8-4429-82b1-2b28f07f8b1e;date_last_modified=2020-09-25;Name=NC_049846.1.gene_1;date_creation=2020-09-25 +NC_049846.1 . mRNA 630 1182 . + . owner=benburrowes@tamu.edu;Parent=8942304a-f4f8-4429-82b1-2b28f07f8b1e;ID=536ddf54-8e6c-4928-99cb-958fbd796706;orig_id=NC_049846.1.gene_1;date_last_modified=2020-09-25;Name=Gene_1_Name;date_creation=2020-09-25 +NC_049846.1 . CDS 643 1182 . + 0 Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=536ddf54-8e6c-4928-99cb-958fbd796706-CDS;Name=536ddf54-8e6c-4928-99cb-958fbd796706-CDS +NC_049846.1 . exon 643 1182 . + . Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=ae9ae711-1428-4f4b-b3ea-e6a808a2e56e;Name=ae9ae711-1428-4f4b-b3ea-e6a808a2e56e +NC_049846.1 . Shine_Dalgarno_sequence 630 634 . + . Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=657855ff-d966-4f69-b695-d9e575c9f8e1;Name=657855ff-d966-4f69-b695-d9e575c9f8e1 +### +NC_049846.1 . gene 1168 1391 . + . owner=benburrowes@tamu.edu;ID=cba0ed94-36c1-4db5-9ddf-66dd9364e918;date_last_modified=2020-09-25;Name=NC_049846.1.gene_2;date_creation=2020-09-25 +NC_049846.1 . mRNA 1168 1391 . + . owner=benburrowes@tamu.edu;Parent=cba0ed94-36c1-4db5-9ddf-66dd9364e918;ID=c146fe26-27b0-4abe-8266-4047c8418fac;orig_id=NC_049846.1.gene_2;date_last_modified=2020-09-25;Name=Gene_2_Name;date_creation=2020-09-25 +NC_049846.1 . CDS 1179 1391 . + 0 Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=c146fe26-27b0-4abe-8266-4047c8418fac-CDS;Name=c146fe26-27b0-4abe-8266-4047c8418fac-CDS +NC_049846.1 . Shine_Dalgarno_sequence 1168 1172 . + . Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=46a3ab10-8f23-4720-b2e5-0875dc73a44a;Name=46a3ab10-8f23-4720-b2e5-0875dc73a44a +NC_049846.1 . exon 1179 1391 . + . Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=86d5448a-a20e-4d45-93d3-8b0ecf3dcd9f;Name=86d5448a-a20e-4d45-93d3-8b0ecf3dcd9f +### +NC_049846.1 . gene 18540 18935 . + . owner=benburrowes@tamu.edu;ID=007a93d5-5241-4646-9dd3-ec991a73760f;date_last_modified=2020-09-25;Name=NC_049846.1.gene_37.exon;date_creation=2020-09-25 +NC_049846.1 . mRNA 18768 18935 . + . owner=benburrowes@tamu.edu;Parent=007a93d5-5241-4646-9dd3-ec991a73760f;ID=b05601f7-fd9e-4787-8d06-783d970d1aa0;orig_id=NC_049846.1.orf00047.exon;date_last_modified=2020-09-25;Name=Gene_3_Name_2;date_creation=2020-09-25 +NC_049846.1 . CDS 18768 18935 . + 0 Parent=b05601f7-fd9e-4787-8d06-783d970d1aa0;ID=a292e7ae-f32a-42b3-968b-a45b0c459246;Name=a292e7ae-f32a-42b3-968b-a45b0c459246 +NC_049846.1 . exon 18768 18935 . + . Parent=b05601f7-fd9e-4787-8d06-783d970d1aa0;ID=0b26ee78-40a7-4f45-92b4-1bc4615eab57;Name=0b26ee78-40a7-4f45-92b4-1bc4615eab57 +NC_049846.1 . mRNA 18540 18935 . + . owner=benburrowes@tamu.edu;Parent=007a93d5-5241-4646-9dd3-ec991a73760f;ID=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;orig_id=NC_049846.1.gene_37.exon;date_last_modified=2020-09-25;Name=Gene_3_Name_1;date_creation=2020-09-25 +NC_049846.1 . exon 18540 18935 . + . Parent=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;ID=477d89cd-31bc-4dee-b661-b6f1e98a5ae1;Name=477d89cd-31bc-4dee-b661-b6f1e98a5ae1 +NC_049846.1 . CDS 18540 18935 . + 0 Parent=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;ID=4c25daa0-fd45-464f-87f3-8aaefc3b584d;Name=4c25daa0-fd45-464f-87f3-8aaefc3b584d +### +NC_049846.1 . gene 28715 28920 . - . owner=benburrowes@tamu.edu;ID=c81f410e-9108-4274-89b3-c52904a05447;date_last_modified=2020-09-25;Name=NC_049846.1.gene_44;date_creation=2020-09-25 +NC_049846.1 . mRNA 28715 28920 . - . owner=benburrowes@tamu.edu;Parent=c81f410e-9108-4274-89b3-c52904a05447;ID=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;orig_id=NC_049846.1.gene_44;date_last_modified=2020-09-25;Name=Gene_4_Name;date_creation=2020-09-25 +NC_049846.1 . Shine_Dalgarno_sequence 28916 28920 . - . Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=7056fcbc-8380-46dc-847a-20b0adfd47bd;Name=7056fcbc-8380-46dc-847a-20b0adfd47bd +NC_049846.1 . CDS 28715 28909 . - 0 Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=0deef6f0-0bfc-448a-a9f3-be4f50304c8b-CDS;Name=0deef6f0-0bfc-448a-a9f3-be4f50304c8b-CDS +NC_049846.1 . exon 28715 28909 . - . Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=1172f6fa-ace1-44dd-852d-dda7ac37bd03;Name=1172f6fa-ace1-44dd-852d-dda7ac37bd03 +### +NC_049846.1 . gene 28924 30078 . - . owner=benburrowes@tamu.edu;ID=8c23e351-f6b4-4142-8874-e4a8f99a0032;date_last_modified=2020-09-25;Name=NC_049846.1.gene_45.exon;date_creation=2020-09-25 +NC_049846.1 . mRNA 28924 29964 . - . owner=benburrowes@tamu.edu;Parent=8c23e351-f6b4-4142-8874-e4a8f99a0032;ID=3e9f4bdb-22df-431d-b213-7773038332ee;orig_id=CDS.0.3610_0.671668761617;date_last_modified=2020-09-25;Name=Gene_5_Name_2;date_creation=2020-09-25 +NC_049846.1 . Shine_Dalgarno_sequence 29962 29964 . - . Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=e1b42b07-9dc3-4465-bb7f-247c8a8ed462;Name=e1b42b07-9dc3-4465-bb7f-247c8a8ed462 +NC_049846.1 . exon 28924 29952 . - . Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=73c2c908-9ae1-4bd1-9005-7cdd67c08e20;Name=73c2c908-9ae1-4bd1-9005-7cdd67c08e20 +NC_049846.1 . CDS 28924 29952 . - 0 Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=3e9f4bdb-22df-431d-b213-7773038332ee-CDS;Name=3e9f4bdb-22df-431d-b213-7773038332ee-CDS +NC_049846.1 . mRNA 28924 30078 . - . owner=benburrowes@tamu.edu;Parent=8c23e351-f6b4-4142-8874-e4a8f99a0032;ID=ac8fd0a0-115e-4343-8420-15fd17fa8406;orig_id=NC_049846.1.gene_45.exon;date_last_modified=2020-09-25;Name=Gene_5_Name_1;date_creation=2020-09-25 +NC_049846.1 . CDS 28924 30078 . - 0 Parent=ac8fd0a0-115e-4343-8420-15fd17fa8406;ID=1dfd5942-f407-4af5-a7e0-a295a7cad29d;Name=1dfd5942-f407-4af5-a7e0-a295a7cad29d +NC_049846.1 . exon 28924 30078 . - . Parent=ac8fd0a0-115e-4343-8420-15fd17fa8406;ID=7b6abf1f-4a08-477b-8061-deb9cd0ecede;Name=7b6abf1f-4a08-477b-8061-deb9cd0ecede +### +NC_049846.1 . terminator 7806 7827 . . . owner=benburrowes@tamu.edu;ID=06991b32-9e41-4e3c-869a-1be0a85b42df;orig_id=terminator_3;date_last_modified=2020-09-25;Name=terminator_3;date_creation=2020-09-25 +### +NC_049846.1 . terminator 8296 8318 . . . owner=benburrowes@tamu.edu;ID=6a49da14-a9db-4da2-b614-b9aa1bb0c90c;orig_id=terminator_7;date_last_modified=2020-09-25;Name=terminator_7;date_creation=2020-09-25 +### +NC_049846.1 . terminator 15779 15799 . . . owner=benburrowes@tamu.edu;ID=ee4a4880-ab4f-43ca-ad46-eff0c3903ca6;orig_id=terminator_16;date_last_modified=2020-09-25;Name=terminator_16;date_creation=2020-09-25 +### +NC_049846.1 . terminator 15393 15415 . . . owner=benburrowes@tamu.edu;ID=78be0f25-47fb-4c3a-9b4b-037f7506f34e;orig_id=terminator_15;date_last_modified=2020-09-25;Name=terminator_15;date_creation=2020-09-25 +### +NC_049846.1 . terminator 18225 18253 . . . owner=benburrowes@tamu.edu;ID=41f56835-6f98-4466-8191-150a3b149c23;orig_id=terminator_19;date_last_modified=2020-09-25;Name=terminator_19;date_creation=2020-09-25 +### +NC_049846.1 . terminator 18936 18955 . . . owner=benburrowes@tamu.edu;ID=4903968a-a6d0-49f1-bb1a-36f96f3ec8cb;orig_id=terminator_21;date_last_modified=2020-09-25;Name=terminator_21;date_creation=2020-09-25 +### +NC_049846.1 . terminator 22422 22446 . . . owner=benburrowes@tamu.edu;ID=47172a5f-4cba-436b-ae74-eb83161e4f8d;orig_id=terminator_26;date_last_modified=2020-09-25;Name=terminator_26;date_creation=2020-09-25 +### +NC_049846.1 . terminator 24572 24601 . . . owner=benburrowes@tamu.edu;ID=7b0bf949-3ced-4274-b7db-576794eacd50;orig_id=terminator_27;date_last_modified=2020-09-25;Name=terminator_27;date_creation=2020-09-25 +### +NC_049846.1 . terminator 28440 28461 . . . owner=benburrowes@tamu.edu;ID=a540bd8e-32cf-464f-a616-7b8912c28799;orig_id=terminator_28;date_last_modified=2020-09-25;Name=terminator_28;date_creation=2020-09-25 +### +NC_049846.1 . terminator 28446 28469 . . . owner=benburrowes@tamu.edu;ID=0b5d5ed2-7875-43b2-a8a1-90aff8acc6ea;orig_id=terminator_29;date_last_modified=2020-09-25;Name=terminator_29;date_creation=2020-09-25 +### +NC_049846.1 . terminator 28490 28509 . . . owner=benburrowes@tamu.edu;ID=203ffe4f-a22d-4bdb-a6d1-5aff3f24d071;orig_id=terminator_31;date_last_modified=2020-09-25;Name=terminator_31;date_creation=2020-09-25 +### +NC_049846.1 . terminator 28494 28521 . . . owner=benburrowes@tamu.edu;ID=078cbc78-de50-49be-96f7-24c15a721158;orig_id=terminator_30;date_last_modified=2020-09-25;Name=terminator_30;date_creation=2020-09-25 +### +NC_049846.1 . terminator 31531 31555 . . . owner=benburrowes@tamu.edu;ID=9b02ad6d-118d-4d57-88ad-9c1ad1ebae3b;orig_id=terminator_33;date_last_modified=2020-09-25;Name=terminator_33;date_creation=2020-09-25 +### +NC_049846.1 . terminator 34271 34306 . . . owner=benburrowes@tamu.edu;ID=fe816a64-f0ab-491c-924c-4f96ad3248b7;orig_id=terminator_36;date_last_modified=2020-09-25;Name=terminator_36;date_creation=2020-09-25 +### +NC_049846.1 . terminator 42539 42569 . . . owner=benburrowes@tamu.edu;ID=9d4c1e44-3f5e-4b9c-8be0-f71c505bf717;orig_id=terminator_44;date_last_modified=2020-09-25;Name=terminator_44;date_creation=2020-09-25 +### +NC_049846.1 . terminator 43744 43765 . . . owner=benburrowes@tamu.edu;ID=743fb65b-4c28-4ff3-b39b-4de65e324304;orig_id=terminator_46;date_last_modified=2020-09-25;Name=terminator_46;date_creation=2020-09-25 diff -r 000000000000 -r 6f4c46313117 cpt_promote_qualifiers/test-data/promote_qualifiers_out.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_promote_qualifiers/test-data/promote_qualifiers_out.gff3 Fri Jun 17 13:05:11 2022 +0000 @@ -0,0 +1,48 @@ +##gff-version 3 +##sequence-region NC_049846.1 1 49045 +NC_049846.1 . gene 630 1182 . + . owner=benburrowes@tamu.edu;ID=8942304a-f4f8-4429-82b1-2b28f07f8b1e;date_last_modified=2020-09-25;Name=Gene_1_Name;date_creation=2020-09-25; +NC_049846.1 . mRNA 630 1182 . + . owner=benburrowes@tamu.edu;Parent=8942304a-f4f8-4429-82b1-2b28f07f8b1e;ID=536ddf54-8e6c-4928-99cb-958fbd796706;orig_id=NC_049846.1.gene_1;date_last_modified=2020-09-25;Name=Gene_1_Name;date_creation=2020-09-25; +NC_049846.1 . CDS 643 1182 . + 0 Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=536ddf54-8e6c-4928-99cb-958fbd796706-CDS;Name=536ddf54-8e6c-4928-99cb-958fbd796706-CDS; +NC_049846.1 . exon 643 1182 . + . Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=ae9ae711-1428-4f4b-b3ea-e6a808a2e56e;Name=ae9ae711-1428-4f4b-b3ea-e6a808a2e56e; +NC_049846.1 . Shine_Dalgarno_sequence 630 634 . + . Parent=536ddf54-8e6c-4928-99cb-958fbd796706;ID=657855ff-d966-4f69-b695-d9e575c9f8e1;Name=657855ff-d966-4f69-b695-d9e575c9f8e1; +NC_049846.1 . gene 1168 1391 . + . owner=benburrowes@tamu.edu;ID=cba0ed94-36c1-4db5-9ddf-66dd9364e918;date_last_modified=2020-09-25;Name=Gene_2_Name;date_creation=2020-09-25; +NC_049846.1 . mRNA 1168 1391 . + . owner=benburrowes@tamu.edu;Parent=cba0ed94-36c1-4db5-9ddf-66dd9364e918;ID=c146fe26-27b0-4abe-8266-4047c8418fac;orig_id=NC_049846.1.gene_2;date_last_modified=2020-09-25;Name=Gene_2_Name;date_creation=2020-09-25; +NC_049846.1 . CDS 1179 1391 . + 0 Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=c146fe26-27b0-4abe-8266-4047c8418fac-CDS;Name=c146fe26-27b0-4abe-8266-4047c8418fac-CDS; +NC_049846.1 . Shine_Dalgarno_sequence 1168 1172 . + . Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=46a3ab10-8f23-4720-b2e5-0875dc73a44a;Name=46a3ab10-8f23-4720-b2e5-0875dc73a44a; +NC_049846.1 . exon 1179 1391 . + . Parent=c146fe26-27b0-4abe-8266-4047c8418fac;ID=86d5448a-a20e-4d45-93d3-8b0ecf3dcd9f;Name=86d5448a-a20e-4d45-93d3-8b0ecf3dcd9f; +NC_049846.1 . gene 18540 18935 . + . owner=benburrowes@tamu.edu;ID=007a93d5-5241-4646-9dd3-ec991a73760f;date_last_modified=2020-09-25;Name=Gene_3_Name_1;date_creation=2020-09-25; +NC_049846.1 . mRNA 18768 18935 . + . owner=benburrowes@tamu.edu;Parent=007a93d5-5241-4646-9dd3-ec991a73760f;ID=b05601f7-fd9e-4787-8d06-783d970d1aa0;orig_id=NC_049846.1.orf00047.exon;date_last_modified=2020-09-25;Name=Gene_3_Name_2;date_creation=2020-09-25; +NC_049846.1 . CDS 18768 18935 . + 0 Parent=b05601f7-fd9e-4787-8d06-783d970d1aa0;ID=a292e7ae-f32a-42b3-968b-a45b0c459246;Name=a292e7ae-f32a-42b3-968b-a45b0c459246; +NC_049846.1 . exon 18768 18935 . + . Parent=b05601f7-fd9e-4787-8d06-783d970d1aa0;ID=0b26ee78-40a7-4f45-92b4-1bc4615eab57;Name=0b26ee78-40a7-4f45-92b4-1bc4615eab57; +NC_049846.1 . mRNA 18540 18935 . + . owner=benburrowes@tamu.edu;Parent=007a93d5-5241-4646-9dd3-ec991a73760f;ID=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;orig_id=NC_049846.1.gene_37.exon;date_last_modified=2020-09-25;Name=Gene_3_Name_1;date_creation=2020-09-25; +NC_049846.1 . exon 18540 18935 . + . Parent=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;ID=477d89cd-31bc-4dee-b661-b6f1e98a5ae1;Name=477d89cd-31bc-4dee-b661-b6f1e98a5ae1; +NC_049846.1 . CDS 18540 18935 . + 0 Parent=4cb5597d-fdfe-4a51-8e5a-477ffefd1815;ID=4c25daa0-fd45-464f-87f3-8aaefc3b584d;Name=4c25daa0-fd45-464f-87f3-8aaefc3b584d; +NC_049846.1 . gene 28715 28920 . - . owner=benburrowes@tamu.edu;ID=c81f410e-9108-4274-89b3-c52904a05447;date_last_modified=2020-09-25;Name=Gene_4_Name;date_creation=2020-09-25; +NC_049846.1 . mRNA 28715 28920 . - . owner=benburrowes@tamu.edu;Parent=c81f410e-9108-4274-89b3-c52904a05447;ID=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;orig_id=NC_049846.1.gene_44;date_last_modified=2020-09-25;Name=Gene_4_Name;date_creation=2020-09-25; +NC_049846.1 . Shine_Dalgarno_sequence 28916 28920 . - . Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=7056fcbc-8380-46dc-847a-20b0adfd47bd;Name=7056fcbc-8380-46dc-847a-20b0adfd47bd; +NC_049846.1 . CDS 28715 28909 . - 0 Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=0deef6f0-0bfc-448a-a9f3-be4f50304c8b-CDS;Name=0deef6f0-0bfc-448a-a9f3-be4f50304c8b-CDS; +NC_049846.1 . exon 28715 28909 . - . Parent=0deef6f0-0bfc-448a-a9f3-be4f50304c8b;ID=1172f6fa-ace1-44dd-852d-dda7ac37bd03;Name=1172f6fa-ace1-44dd-852d-dda7ac37bd03; +NC_049846.1 . gene 28924 30078 . - . owner=benburrowes@tamu.edu;ID=8c23e351-f6b4-4142-8874-e4a8f99a0032;date_last_modified=2020-09-25;Name=Gene_5_Name_1;date_creation=2020-09-25; +NC_049846.1 . mRNA 28924 29964 . - . owner=benburrowes@tamu.edu;Parent=8c23e351-f6b4-4142-8874-e4a8f99a0032;ID=3e9f4bdb-22df-431d-b213-7773038332ee;orig_id=CDS.0.3610_0.671668761617;date_last_modified=2020-09-25;Name=Gene_5_Name_2;date_creation=2020-09-25; +NC_049846.1 . Shine_Dalgarno_sequence 29962 29964 . - . Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=e1b42b07-9dc3-4465-bb7f-247c8a8ed462;Name=e1b42b07-9dc3-4465-bb7f-247c8a8ed462; +NC_049846.1 . exon 28924 29952 . - . Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=73c2c908-9ae1-4bd1-9005-7cdd67c08e20;Name=73c2c908-9ae1-4bd1-9005-7cdd67c08e20; +NC_049846.1 . CDS 28924 29952 . - 0 Parent=3e9f4bdb-22df-431d-b213-7773038332ee;ID=3e9f4bdb-22df-431d-b213-7773038332ee-CDS;Name=3e9f4bdb-22df-431d-b213-7773038332ee-CDS; +NC_049846.1 . mRNA 28924 30078 . - . owner=benburrowes@tamu.edu;Parent=8c23e351-f6b4-4142-8874-e4a8f99a0032;ID=ac8fd0a0-115e-4343-8420-15fd17fa8406;orig_id=NC_049846.1.gene_45.exon;date_last_modified=2020-09-25;Name=Gene_5_Name_1;date_creation=2020-09-25; +NC_049846.1 . CDS 28924 30078 . - 0 Parent=ac8fd0a0-115e-4343-8420-15fd17fa8406;ID=1dfd5942-f407-4af5-a7e0-a295a7cad29d;Name=1dfd5942-f407-4af5-a7e0-a295a7cad29d; +NC_049846.1 . exon 28924 30078 . - . Parent=ac8fd0a0-115e-4343-8420-15fd17fa8406;ID=7b6abf1f-4a08-477b-8061-deb9cd0ecede;Name=7b6abf1f-4a08-477b-8061-deb9cd0ecede; +NC_049846.1 . terminator 7806 7827 . . . owner=benburrowes@tamu.edu;ID=06991b32-9e41-4e3c-869a-1be0a85b42df;orig_id=terminator_3;date_last_modified=2020-09-25;Name=terminator_3;date_creation=2020-09-25; +NC_049846.1 . terminator 8296 8318 . . . owner=benburrowes@tamu.edu;ID=6a49da14-a9db-4da2-b614-b9aa1bb0c90c;orig_id=terminator_7;date_last_modified=2020-09-25;Name=terminator_7;date_creation=2020-09-25; +NC_049846.1 . terminator 15779 15799 . . . owner=benburrowes@tamu.edu;ID=ee4a4880-ab4f-43ca-ad46-eff0c3903ca6;orig_id=terminator_16;date_last_modified=2020-09-25;Name=terminator_16;date_creation=2020-09-25; +NC_049846.1 . terminator 15393 15415 . . . owner=benburrowes@tamu.edu;ID=78be0f25-47fb-4c3a-9b4b-037f7506f34e;orig_id=terminator_15;date_last_modified=2020-09-25;Name=terminator_15;date_creation=2020-09-25; +NC_049846.1 . terminator 18225 18253 . . . owner=benburrowes@tamu.edu;ID=41f56835-6f98-4466-8191-150a3b149c23;orig_id=terminator_19;date_last_modified=2020-09-25;Name=terminator_19;date_creation=2020-09-25; +NC_049846.1 . terminator 18936 18955 . . . owner=benburrowes@tamu.edu;ID=4903968a-a6d0-49f1-bb1a-36f96f3ec8cb;orig_id=terminator_21;date_last_modified=2020-09-25;Name=terminator_21;date_creation=2020-09-25; +NC_049846.1 . terminator 22422 22446 . . . owner=benburrowes@tamu.edu;ID=47172a5f-4cba-436b-ae74-eb83161e4f8d;orig_id=terminator_26;date_last_modified=2020-09-25;Name=terminator_26;date_creation=2020-09-25; +NC_049846.1 . terminator 24572 24601 . . . owner=benburrowes@tamu.edu;ID=7b0bf949-3ced-4274-b7db-576794eacd50;orig_id=terminator_27;date_last_modified=2020-09-25;Name=terminator_27;date_creation=2020-09-25; +NC_049846.1 . terminator 28440 28461 . . . owner=benburrowes@tamu.edu;ID=a540bd8e-32cf-464f-a616-7b8912c28799;orig_id=terminator_28;date_last_modified=2020-09-25;Name=terminator_28;date_creation=2020-09-25; +NC_049846.1 . terminator 28446 28469 . . . owner=benburrowes@tamu.edu;ID=0b5d5ed2-7875-43b2-a8a1-90aff8acc6ea;orig_id=terminator_29;date_last_modified=2020-09-25;Name=terminator_29;date_creation=2020-09-25; +NC_049846.1 . terminator 28490 28509 . . . owner=benburrowes@tamu.edu;ID=203ffe4f-a22d-4bdb-a6d1-5aff3f24d071;orig_id=terminator_31;date_last_modified=2020-09-25;Name=terminator_31;date_creation=2020-09-25; +NC_049846.1 . terminator 28494 28521 . . . owner=benburrowes@tamu.edu;ID=078cbc78-de50-49be-96f7-24c15a721158;orig_id=terminator_30;date_last_modified=2020-09-25;Name=terminator_30;date_creation=2020-09-25; +NC_049846.1 . terminator 31531 31555 . . . owner=benburrowes@tamu.edu;ID=9b02ad6d-118d-4d57-88ad-9c1ad1ebae3b;orig_id=terminator_33;date_last_modified=2020-09-25;Name=terminator_33;date_creation=2020-09-25; +NC_049846.1 . terminator 34271 34306 . . . owner=benburrowes@tamu.edu;ID=fe816a64-f0ab-491c-924c-4f96ad3248b7;orig_id=terminator_36;date_last_modified=2020-09-25;Name=terminator_36;date_creation=2020-09-25; +NC_049846.1 . terminator 42539 42569 . . . owner=benburrowes@tamu.edu;ID=9d4c1e44-3f5e-4b9c-8be0-f71c505bf717;orig_id=terminator_44;date_last_modified=2020-09-25;Name=terminator_44;date_creation=2020-09-25; +NC_049846.1 . terminator 43744 43765 . . . owner=benburrowes@tamu.edu;ID=743fb65b-4c28-4ff3-b39b-4de65e324304;orig_id=terminator_46;date_last_modified=2020-09-25;Name=terminator_46;date_creation=2020-09-25;