changeset 0:6e7e20cb1fc7 draft

Uploaded
author cpt
date Fri, 17 Jun 2022 04:00:49 +0000
parents
children 4f4b413056f6
files cpt_gff_rebase/cpt-macros.xml cpt_gff_rebase/gff3.py cpt_gff_rebase/gff3_rebase.py cpt_gff_rebase/gff3_rebase.xml cpt_gff_rebase/macros.xml cpt_gff_rebase/test-data/T7_CLEAN.gff3 cpt_gff_rebase/test-data/T7_TMHMM.gff3 cpt_gff_rebase/test-data/T7_TMHMM_REBASE.gff3 cpt_gff_rebase/test-data/child.gff cpt_gff_rebase/test-data/nonprotein.gff cpt_gff_rebase/test-data/parent.gff cpt_gff_rebase/test-data/proteins.gff
diffstat 12 files changed, 1031 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/cpt-macros.xml	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,115 @@
+<?xml version="1.0"?>
+<macros>
+	<xml name="gff_requirements">
+		<requirements>
+			<requirement type="package" version="2.7">python</requirement>
+			<requirement type="package" version="1.65">biopython</requirement>
+			<requirement type="package" version="2.12.1">requests</requirement>
+			<yield/>
+		</requirements>
+		<version_command>
+		<![CDATA[
+			cd $__tool_directory__ && git rev-parse HEAD
+		]]>
+		</version_command>
+	</xml>
+	<xml name="citation/mijalisrasche">
+		<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+		<citation type="bibtex">@unpublished{galaxyTools,
+		author = {E. Mijalis, H. Rasche},
+		title = {CPT Galaxy Tools},
+		year = {2013-2017},
+		note = {https://github.com/tamu-cpt/galaxy-tools/}
+		}
+		</citation>
+	</xml>
+	<xml name="citations">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation> 
+		<yield/>
+		</citations>
+	</xml>
+    	<xml name="citations-crr">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Ross},
+				title = {CPT Galaxy Tools},
+				year = {2020-},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+		<yield/>
+		</citations>
+	</xml>
+        <xml name="citations-2020">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+                        <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+                        <yield/>
+		</citations>
+	</xml>
+        <xml name="citations-2020-AJC-solo">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+                        <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+                        <yield/>
+		</citations>
+	</xml>
+        <xml name="citations-clm">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+                        <yield/>
+		</citations>
+	</xml>
+        <xml name="sl-citations-clm">
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+                        <yield/>
+	</xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/gff3.py	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,346 @@
+import copy
+import logging
+
+log = logging.getLogger()
+log.setLevel(logging.WARN)
+
+
+def feature_lambda(
+    feature_list,
+    test,
+    test_kwargs,
+    subfeatures=True,
+    parent=None,
+    invert=False,
+    recurse=True,
+):
+    """Recursively search through features, testing each with a test function, yielding matches.
+
+    GFF3 is a hierachical data structure, so we need to be able to recursively
+    search through features. E.g. if you're looking for a feature with
+    ID='bob.42', you can't just do a simple list comprehension with a test
+    case. You don't know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.
+
+    :type feature_list: list
+    :param feature_list: an iterable of features
+
+    :type test: function reference
+    :param test: a closure with the method signature (feature, **kwargs) where
+                 the kwargs are those passed in the next argument. This
+                 function should return True or False, True if the feature is
+                 to be yielded as part of the main feature_lambda function, or
+                 False if it is to be ignored. This function CAN mutate the
+                 features passed to it (think "apply").
+
+    :type test_kwargs: dictionary
+    :param test_kwargs: kwargs to pass to your closure when it is called.
+
+    :type subfeatures: boolean
+    :param subfeatures: when a feature is matched, should just that feature be
+                        yielded to the caller, or should the entire sub_feature
+                        tree for that feature be included? subfeatures=True is
+                        useful in cases such as searching for a gene feature,
+                        and wanting to know what RBS/Shine_Dalgarno_sequences
+                        are in the sub_feature tree (which can be accomplished
+                        with two feature_lambda calls). subfeatures=False is
+                        useful in cases when you want to process (and possibly
+                        return) the entire feature tree, such as applying a
+                        qualifier to every single feature.
+
+    :type invert: boolean
+    :param invert: Negate/invert the result of the filter.
+
+    :rtype: yielded list
+    :return: Yields a list of matching features.
+    """
+    # Either the top level set of [features] or the subfeature attribute
+    for feature in feature_list:
+        feature._parent = parent
+        if not parent:
+            # Set to self so we cannot go above root.
+            feature._parent = feature
+        test_result = test(feature, **test_kwargs)
+        # if (not invert and test_result) or (invert and not test_result):
+        if invert ^ test_result:
+            if not subfeatures:
+                feature_copy = copy.deepcopy(feature)
+                feature_copy.sub_features = list()
+                yield feature_copy
+            else:
+                yield feature
+
+        if recurse and hasattr(feature, "sub_features"):
+            for x in feature_lambda(
+                feature.sub_features,
+                test,
+                test_kwargs,
+                subfeatures=subfeatures,
+                parent=feature,
+                invert=invert,
+                recurse=recurse,
+            ):
+                yield x
+
+
+def fetchParent(feature):
+    if not hasattr(feature, "_parent") or feature._parent is None:
+        return feature
+    else:
+        return fetchParent(feature._parent)
+
+
+def feature_test_true(feature, **kwargs):
+    return True
+
+
+def feature_test_type(feature, **kwargs):
+    if "type" in kwargs:
+        return str(feature.type).upper() == str(kwargs["type"]).upper()
+    elif "types" in kwargs:
+      for x in kwargs["types"]:
+        if str(feature.type).upper() == str(x).upper():
+          return True
+      return False
+    raise Exception("Incorrect feature_test_type call, need type or types")
+
+
+def feature_test_qual_value(feature, **kwargs):
+    """Test qualifier values.
+
+    For every feature, check that at least one value in
+    feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list']
+    """
+    if isinstance(kwargs["qualifier"], list):
+        for qualifier in kwargs["qualifier"]:
+            for attribute_value in feature.qualifiers.get(qualifier, []):
+                if attribute_value in kwargs["attribute_list"]:
+                    return True
+    else:
+        for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []):
+            if attribute_value in kwargs["attribute_list"]:
+                return True
+    return False
+
+
+def feature_test_location(feature, **kwargs):
+    if "strand" in kwargs:
+        if feature.location.strand != kwargs["strand"]:
+            return False
+
+    return feature.location.start <= kwargs["loc"] <= feature.location.end
+
+
+def feature_test_quals(feature, **kwargs):
+    """
+    Example::
+
+        a = Feature(qualifiers={'Note': ['Some notes', 'Aasdf']})
+
+        # Check if a contains a Note
+        feature_test_quals(a, {'Note': None})  # Returns True
+        feature_test_quals(a, {'Product': None})  # Returns False
+
+        # Check if a contains a note with specific value
+        feature_test_quals(a, {'Note': ['ome']})  # Returns True
+
+        # Check if a contains a note with specific value
+        feature_test_quals(a, {'Note': ['other']})  # Returns False
+    """
+    for key in kwargs:
+        if key not in feature.qualifiers:
+            return False
+
+        # Key is present, no value specified
+        if kwargs[key] is None:
+            return True
+
+        # Otherwise there is a key value we're looking for.
+        # so we make a list of matches
+        matches = []
+        # And check all of the feature qualifier valuse
+        for value in feature.qualifiers[key]:
+            # For that kwargs[key] value
+            for x in kwargs[key]:
+                matches.append(x in value)
+
+        # If none matched, then we return false.
+        if not any(matches):
+            return False
+
+    return True
+
+
+def feature_test_contains(feature, **kwargs):
+    if "index" in kwargs:
+        return feature.location.start < kwargs["index"] < feature.location.end
+    elif "range" in kwargs:
+        return (
+            feature.location.start < kwargs["range"]["start"] < feature.location.end
+            and feature.location.start < kwargs["range"]["end"] < feature.location.end
+        )
+    else:
+        raise RuntimeError("Must use index or range keyword")
+
+
+def get_id(feature=None, parent_prefix=None):
+    result = ""
+    if parent_prefix is not None:
+        result += parent_prefix + "|"
+    if "locus_tag" in feature.qualifiers:
+        result += feature.qualifiers["locus_tag"][0]
+    elif "gene" in feature.qualifiers:
+        result += feature.qualifiers["gene"][0]
+    elif "Gene" in feature.qualifiers:
+        result += feature.qualifiers["Gene"][0]
+    elif "product" in feature.qualifiers:
+        result += feature.qualifiers["product"][0]
+    elif "Product" in feature.qualifiers:
+        result += feature.qualifiers["Product"][0]
+    elif "Name" in feature.qualifiers:
+        result += feature.qualifiers["Name"][0]
+    else:
+        return feature.id
+        # Leaving in case bad things happen.
+        # result += '%s_%s_%s_%s' % (
+        # feature.id,
+        # feature.location.start,
+        # feature.location.end,
+        # feature.location.strand
+        # )
+    return result
+
+
+def get_gff3_id(gene):
+    return gene.qualifiers.get("Name", [gene.id])[0]
+
+
+def ensure_location_in_bounds(start=0, end=0, parent_length=0):
+    # This prevents frameshift errors
+    while start < 0:
+        start += 3
+    while end < 0:
+        end += 3
+    while start > parent_length:
+        start -= 3
+    while end > parent_length:
+        end -= 3
+    return (start, end)
+
+
+def coding_genes(feature_list):
+    for x in genes(feature_list):
+        if (
+            len(
+                list(
+                    feature_lambda(
+                        x.sub_features,
+                        feature_test_type,
+                        {"type": "CDS"},
+                        subfeatures=False,
+                    )
+                )
+            )
+            > 0
+        ):
+            yield x
+
+
+def genes(feature_list, feature_type="gene", sort=False):
+    """
+    Simple filter to extract gene features from the feature set.
+    """
+
+    if not sort:
+        for x in feature_lambda(
+            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True
+        ):
+            yield x
+    else:
+        data = list(genes(feature_list, feature_type=feature_type, sort=False))
+        data = sorted(data, key=lambda feature: feature.location.start)
+        for x in data:
+            yield x
+
+
+def wa_unified_product_name(feature):
+    """
+    Try and figure out a name. We gave conflicting instructions, so
+    this isn't as trivial as it should be. Sometimes it will be in
+    'product' or 'Product', othertimes in 'Name'
+    """
+    # Manually applied tags.
+    protein_product = feature.qualifiers.get(
+        "product", feature.qualifiers.get("Product", [None])
+    )[0]
+
+    # If neither of those are available ...
+    if protein_product is None:
+        # And there's a name...
+        if "Name" in feature.qualifiers:
+            if not is_uuid(feature.qualifiers["Name"][0]):
+                protein_product = feature.qualifiers["Name"][0]
+
+    return protein_product
+
+
+def is_uuid(name):
+    return name.count("-") == 4 and len(name) == 36
+
+
+def get_rbs_from(gene):
+    # Normal RBS annotation types
+    rbs_rbs = list(
+        feature_lambda(
+            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False
+        )
+    )
+    rbs_sds = list(
+        feature_lambda(
+            gene.sub_features,
+            feature_test_type,
+            {"type": "Shine_Dalgarno_sequence"},
+            subfeatures=False,
+        )
+    )
+    # Fraking apollo
+    apollo_exons = list(
+        feature_lambda(
+            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False
+        )
+    )
+    apollo_exons = [x for x in apollo_exons if len(x) < 10]
+    # These are more NCBI's style
+    regulatory_elements = list(
+        feature_lambda(
+            gene.sub_features,
+            feature_test_type,
+            {"type": "regulatory"},
+            subfeatures=False,
+        )
+    )
+    rbs_regulatory = list(
+        feature_lambda(
+            regulatory_elements,
+            feature_test_quals,
+            {"regulatory_class": ["ribosome_binding_site"]},
+            subfeatures=False,
+        )
+    )
+    # Here's hoping you find just one ;)
+    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons
+
+
+def nice_name(record):
+    """
+    get the real name rather than NCBI IDs and so on. If fails, will return record.id
+    """
+    name = record.id
+    likely_parental_contig = list(genes(record.features, feature_type="contig"))
+    if len(likely_parental_contig) == 1:
+        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]
+    return name
+
+
+def fsort(it):
+    for i in sorted(it, key=lambda x: int(x.location.start)):
+        yield i
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/gff3_rebase.py	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,131 @@
+#!/usr/bin/env python
+import sys
+import logging
+import argparse
+from gff3 import feature_lambda, feature_test_qual_value
+from CPT_GFFParser import gffParse, gffWrite
+from Bio.SeqFeature import FeatureLocation
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+def __get_features(child, interpro=False):
+    child_features = {}
+    for rec in gffParse(child):
+        log.info("Parsing %s", rec.id)
+        # Only top level
+        for feature in rec.features:
+            # Get the record id as parent_feature_id (since this is how it will be during remapping)
+            parent_feature_id = rec.id
+            # If it's an interpro specific gff3 file
+            if interpro:
+                # Then we ignore polypeptide features as they're useless
+                if feature.type == "polypeptide":
+                    continue
+
+            try:
+                child_features[parent_feature_id].append(feature)
+            except KeyError:
+                child_features[parent_feature_id] = [feature]
+            # Keep a list of feature objects keyed by parent record id
+    return child_features
+
+
+def __update_feature_location(feature, parent, protein2dna):
+    start = feature.location.start
+    end = feature.location.end
+    if protein2dna:
+        start *= 3
+        end *= 3
+
+    if parent.location.strand >= 0:
+        ns = parent.location.start + start
+        ne = parent.location.start + end
+        st = +1
+    else:
+        ns = parent.location.end - end
+        ne = parent.location.end - start
+        st = -1
+
+    # Don't let start/stops be less than zero.
+    #
+    # Instead, we'll replace with %3 to try and keep it in the same reading
+    # frame that it should be in.
+
+    if ns < 0:
+        ns %= 3
+    if ne < 0:
+        ne %= 3
+
+    feature.location = FeatureLocation(ns, ne, strand=st)
+
+    if hasattr(feature, "sub_features"):
+        for subfeature in feature.sub_features:
+            __update_feature_location(subfeature, parent, protein2dna)
+
+
+def rebase(parent, child, interpro=False, protein2dna=False, map_by="ID"):
+    # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID
+    child_features = __get_features(child, interpro=interpro)
+
+    for rec in gffParse(parent):
+        replacement_features = []
+        # Horrifically slow I believe
+        for feature in feature_lambda(
+            rec.features,
+            # Filter features in the parent genome by those that are
+            # "interesting", i.e. have results in child_features array.
+            # Probably an unnecessary optimisation.
+            feature_test_qual_value,
+            {"qualifier": map_by, "attribute_list": child_features.keys()},
+            subfeatures=False,
+        ):
+
+            # Features which will be re-mapped
+            to_remap = child_features[feature.id]
+
+            fixed_features = []
+            for x in to_remap:
+                # Then update the location of the actual feature
+                __update_feature_location(x, feature, protein2dna)
+
+                if interpro:
+                    for y in ("status", "Target"):
+                        try:
+                            del x.qualifiers[y]
+                        except:
+                            pass
+
+                fixed_features.append(x)
+            replacement_features.extend(fixed_features)
+        # We do this so we don't include the original set of features that we
+        # were rebasing against in our result.
+        rec.features = replacement_features
+        rec.annotations = {}
+        gffWrite([rec], sys.stdout)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="rebase gff3 features against parent locations", epilog=""
+    )
+    parser.add_argument(
+        "parent", type=argparse.FileType("r"), help="Parent GFF3 annotations"
+    )
+    parser.add_argument(
+        "child",
+        type=argparse.FileType("r"),
+        help="Child GFF3 annotations to rebase against parent",
+    )
+    parser.add_argument(
+        "--interpro", action="store_true", help="Interpro specific modifications"
+    )
+    parser.add_argument(
+        "--protein2dna",
+        action="store_true",
+        help="Map protein translated results to original DNA data",
+    )
+    parser.add_argument("--map_by", help="Map by key", default="ID")
+    args = parser.parse_args()
+    rebase(**vars(args))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/gff3_rebase.xml	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,102 @@
+<tool id="gff3.rebase" name="Rebase GFF3 features" version="19.1.0.0">
+  <description>against parent features</description>
+  <macros>
+    <import>macros.xml</import>
+		<import>cpt-macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <command interpreter="python" detect_errors="aggressive"><![CDATA[gff3_rebase.py
+$parent
+$child
+
+$interpro
+$protein2dna
+--map_by "$map_by"
+> $default]]></command>
+  <inputs>
+    <param label="Parent GFF3 annotations"                         name="parent" format="gff3" type="data"/>
+    <param label="Child GFF3 annotations to rebase against parent" name="child"  format="gff3" type="data"/>
+
+    <param label="Interpro specific modifications" name="interpro" type="boolean" truevalue="--interpro" falsevalue=""/>
+    <param label="Map protein translated results to original DNA data" name="protein2dna" type="boolean" truevalue="--protein2dna" falsevalue=""/>
+
+    <param label="Mapping Key" name="map_by" type="text" value="ID" />
+  </inputs>
+  <outputs>
+    <data format="gff3" name="default"/>
+  </outputs>
+  <tests>
+      <test>
+          <param name="parent" value="T7_CLEAN.gff3"/>
+          <param name="child" value="T7_TMHMM.gff3"/>
+          <param name="interpro" value="" />
+          <param name="protein2dna" value="--protein2dna" />
+          <param name="map_by" value="ID" />
+          <output name="default" file="T7_TMHMM_REBASE.gff3"/>
+      </test>
+      <test>
+          <param name="parent" value="parent.gff"/>
+          <param name="child" value="child.gff"/>
+	  <param name="interpro" value="" />
+          <param name="protein2dna" value="--protein2dna" />
+	  <param name="map_by" value="ID" />
+          <output name="default" file="proteins.gff"/>
+      </test>
+      <test>
+          <param name="parent" value="parent.gff"/>
+          <param name="child" value="child.gff"/>
+          <param name="interpro" value="" />
+          <param name="protein2dna" value="" />
+	  <param name="map_by" value="ID" />
+	  <output name="default" file="nonprotein.gff"/>
+      </test>
+  </tests>
+  <help><![CDATA[
+**What it does**
+
+The workflow in a genomic data analysis typically follows a process of feature 
+export, analysis and then mapping the results of the analysis back to the genome.
+
+For meaningful display in JBrowse, it is necessary to accurately map 
+analysis results back to their corresponding positions in the context of the entire 
+genome.
+
+This tool fills that gap, by *rebasing* (calculating parent genome coordinates) 
+features from analysis results against the parent features which 
+were originally used for the analysis.
+
+**Example Input/Output**
+
+For a *parent* set of annotations::
+
+	#gff-version 3
+	PhageBob    maker   cds     300     600     .       +       .       ID=cds42
+
+Where the analysis had exported the CDS (child) FASTA sequence:: 
+
+	>cds42
+	MRTNASC
+
+Then analyzed that feature, producing the *child* annotation file::
+
+	#gff-version 3
+	cds42       blastp  match_part      1       50      1e-40   .       .       ID=m00001;Notes=RNAse A Protein
+
+This tool will then localize the results properly against the parent and permit 
+proper visualization of the results in the correct location::
+
+	#gff-version 3
+	PhageBob    blastp  match_part      300     449     1e-40   +       .       ID=m00001;Notes=RNAse A Protein
+
+**Options**
+
+The **Interpro specific modifications** option selectively ignores *features* (*i.e.* polypeptide) and 
+qualifiers (status, Target) not needed in the output. 
+
+The **Map protein translated results to original DNA data** option indicates that the DNA sequences were translated into 
+protein sequence during the genomic export process. When this option is selected, 
+the tool will multiply the bases by three to obtain the correct DNA locations.
+
+]]></help>
+		<expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/macros.xml	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,85 @@
+<?xml version="1.0"?>
+<macros>
+	<xml name="requirements">
+		<requirements>
+			<requirement type="package" version="3.8.13">python</requirement>
+			<requirement type="package" version="1.79">biopython</requirement>
+			<requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
+			<yield/>
+		</requirements>
+	</xml>
+	<token name="@BLAST_TSV@">
+		"$blast_tsv"
+	</token>
+	<xml name="blast_tsv">
+		<param label="Blast Results" help="TSV/tabular (25 Column)"
+			name="blast_tsv" type="data" format="tabular" />
+	</xml>
+
+	<token name="@BLAST_XML@">
+		"$blast_xml"
+	</token>
+	<xml name="blast_xml">
+		<param label="Blast Results" help="XML format"
+			name="blast_xml" type="data" format="blastxml" />
+	</xml>
+	<xml name="gff3_with_fasta">
+	<param label="Genome Sequences" name="fasta" type="data" format="fasta" />
+	<param label="Genome Annotations" name="gff3" type="data" format="gff3" />
+	</xml>
+	<xml name="genome_selector">
+		<conditional name="reference_genome">
+			<param name="reference_genome_source" type="select" label="Reference Genome">
+				<option value="history" selected="True">From History</option>
+				<option value="cached">Locally Cached</option>
+			</param>
+			<when value="cached">
+				<param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+					<options from_data_table="all_fasta"/>
+				</param>
+			</when>
+			<when value="history">
+				<param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+			</when>
+		</conditional>
+	</xml>
+	<xml name="gff3_input">
+		<param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+	</xml>
+	<xml name="input/gff3+fasta">
+		<expand macro="gff3_input" />
+		<expand macro="genome_selector" />
+	</xml>
+	<token name="@INPUT_GFF@">
+	"$gff3_data"
+	</token>
+	<token name="@INPUT_FASTA@">
+#if str($reference_genome.reference_genome_source) == 'cached':
+		"${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+		genomeref.fa
+#end if
+	</token>
+	<token name="@GENOME_SELECTOR_PRE@">
+#if $reference_genome.reference_genome_source == 'history':
+		ln -s $reference_genome.genome_fasta genomeref.fa;
+#end if
+	</token>
+	<token name="@GENOME_SELECTOR@">
+#if str($reference_genome.reference_genome_source) == 'cached':
+		"${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+		genomeref.fa
+#end if
+	</token>
+        <xml name="input/fasta">
+		<param label="Fasta file" name="sequences" type="data" format="fasta"/>
+	</xml>
+
+	<token name="@SEQUENCE@">
+		"$sequences"
+	</token>
+	<xml name="input/fasta/protein">
+		<param label="Protein fasta file" name="sequences" type="data" format="fasta"/>
+	</xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/test-data/T7_CLEAN.gff3	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,171 @@
+##gff-version 3
+NC_001604	GenBank	contig	1	39937	.	+	1	ID=NC_001604;Dbxref=BioProject:PRJNA485481,taxon:10760;Name=NC_001604;Note=Enterobacteria phage T7%2C complete genome.,VALIDATED REFSEQ: This record has undergone validation or preliminary review. The reference sequence was derived from V01146. The sequence was submitted by the authors [1] on magnetic tape and revised according to [3],[4],and [5]. [3] made changes at 8 positions in gene 1 without affecting the size of the total sequence but changing gene 1 amino acids 443,474,and 388 to 424. [4] inserted a T at nucleotide 17511,increasing the total sequence to 39937 bp. This change,originally found in T3 DNA [8],revealed gene 5.9 and shortened gene 6. [5] changed the nucleotides at 11061 and 11062 from GT to TG,changing amino acid 119 of T7 lysozyme (gene 3.5) from glycine to valine. Features have been extracted from [1] unless otherwise noted. The sequence shown is that of the l strand,which corresponds to the sequence of all mRNAs of known functional significance. Early mRNAs are produced by three major promoters for E. coli RNA polymerase A1,A2,and A3,located near the left end of the DNA. A fourth major E. coli promoter,A0 (also called D),that would direct transcription leftward,and several minor E. coli promoters (see Table 6 in [1]) function in vitro but have no known in vivo function. Late mRNAs are produced by 15 promoters for T7 RNA polymerase distributed across the right-most 85%25 of the DNA,and named e.g. phi10,for the first gene downstream of the promoter. There are also two T7 promoters,phiOL and phiOR,associated with possible origins of replication at the left and right ends of T7 DNA. The 23 base-pair consensus sequence for T7 promoters stretches from -17 to +6,where the initiating nucleotide is at +1. T7 DNA also contains a 160 base-pair terminal repetition. The beginning and end of RNAs are determined by the promoters,by a terminator for E. coli RNA polymerase,TE,located at the end of the early region,a terminator for T7 RNA polymerase,Tphi,located just downstream of gene 10,and a series of RNase III cleavage sites. Early mRNAs made by E. coli RNA polymerase are listed in Features. The many RNAs predicted to be made by T7 RNA polymerase are not listed but can be deduced from the position of the transcription signals (see Tables 8 and 9 in [1]). Promoters are listed in Features by the known or predicted first nucleotide of the RNA,terminators by the last nucleotide of the RNA,and RNase III sites by the nucleotide 5' of the position of cleavage. Genes are numbered 0.3 to 19.5 in order of their left-to-right position on the genome. Proteins are named by the gene number,e.g.,the gene 1 protein,or by a functional name,e.g.,T7 RNA polymerase. There is now genetic or biochemical evidence that proteins are produced from at least 52 of the 56 T7 genes. Gene 4 produces two proteins,4A and 4B,by initiating translation at two different sites in the same reading frame. Gene 10 produces two proteins,10A and 10B,by frameshifting during translation. Genes 0.6 and 5.5 probably also make two proteins by translational frameshifting,the gene 5.5 frameshift producing a gene 5.5-5.7 fusion protein. COMPLETENESS: full length. ;comment1=VALIDATED REFSEQ: This record has undergone validation or preliminary review. The reference sequence was derived from V01146. The sequence was submitted by the authors [1] on magnetic tape and revised according to [3]%2C [4]%2C and [5]. [3] made changes at 8 positions in gene 1 without affecting the size of the total sequence but changing gene 1 amino acids 443%2C 474%2C and 388 to 424. [4] inserted a T at nucleotide 17511%2C increasing the total sequence to 39937 bp. This change%2C originally found in T3 DNA [8]%2C revealed gene 5.9 and shortened gene 6. [5] changed the nucleotides at 11061 and 11062 from GT to TG%2C changing amino acid 119 of T7 lysozyme (gene 3.5) from glycine to valine. Features have been extracted from [1] unless otherwise noted. The sequence shown is that of the l strand%2C which corresponds to the sequence of all mRNAs of known functional significance. Early mRNAs are produced by three major promoters for E. coli RNA polymerase A1%2C A2%2C and A3%2C located near the left end of the DNA. A fourth major E. coli promoter%2C A0 (also called D)%2C that would direct transcription leftward%2C and several minor E. coli promoters (see Table 6 in [1]) function in vitro but have no known in vivo function. Late mRNAs are produced by 15 promoters for T7 RNA polymerase distributed across the right-most 85%25 of the DNA%2C and named e.g. phi10%2C for the first gene downstream of the promoter. There are also two T7 promoters%2C phiOL and phiOR%2C associated with possible origins of replication at the left and right ends of T7 DNA. The 23 base-pair consensus sequence for T7 promoters stretches from -17 to +6%2C where the initiating nucleotide is at +1. T7 DNA also contains a 160 base-pair terminal repetition. The beginning and end of RNAs are determined by the promoters%3B by a terminator for E. coli RNA polymerase%2C TE%2C located at the end of the early region%3B a terminator for T7 RNA polymerase%2C Tphi%2C located just downstream of gene 10%3B and a series of RNase III cleavage sites. Early mRNAs made by E. coli RNA polymerase are listed in Features. The many RNAs predicted to be made by T7 RNA polymerase are not listed but can be deduced from the position of the transcription signals (see Tables 8 and 9 in [1]). Promoters are listed in Features by the known or predicted first nucleotide of the RNA%2C terminators by the last nucleotide of the RNA%2C and RNase III sites by the nucleotide 5' of the position of cleavage. Genes are numbered 0.3 to 19.5 in order of their left-to-right position on the genome. Proteins are named by the gene number%2C e.g.%2C the gene 1 protein%2C or by a functional name%2C e.g.%2C T7 RNA polymerase. There is now genetic or biochemical evidence that proteins are produced from at least 52 of the 56 T7 genes. Gene 4 produces two proteins%2C 4A and 4B%2C by initiating translation at two different sites in the same reading frame. Gene 10 produces two proteins%2C 10A and 10B%2C by frameshifting during translation. Genes 0.6 and 5.5 probably also make two proteins by translational frameshifting%2C the gene 5.5 frameshift producing a gene 5.5-5.7 fusion protein. COMPLETENESS: full length. ;date=13-AUG-2018;host=Escherichia coli;mol_type=genomic DNA;organism=Escherichia phage T7;
+NC_001604	GenBank	regulatory	224	224	.	+	1	ID=GenBank:regulatory:NC_001604:224:224;Note=E. coli promoter A0 (leftward);regulatory_class=promoter;
+NC_001604	GenBank	regulatory	405	405	.	+	1	ID=GenBank:regulatory:NC_001604:405:405;Note=T7 promoter phiOL;regulatory_class=promoter;
+NC_001604	GenBank	regulatory	498	498	.	+	1	ID=GenBank:regulatory:NC_001604:498:498;Note=E. coli promoter A1;regulatory_class=promoter;
+NC_001604	GenBank	regulatory	626	626	.	+	1	ID=GenBank:regulatory:NC_001604:626:626;Note=E. coli promoter A2;regulatory_class=promoter;
+NC_001604	GenBank	regulatory	750	750	.	+	1	ID=GenBank:regulatory:NC_001604:750:750;Note=E. coli promoter A3;regulatory_class=promoter;
+NC_001604	GenBank	sequence_secondary_structure	890	890	.	+	1	ID=GenBank:sequence_secondary_structure:NC_001604:890:890;Note=RNase III site R0.3;
+NC_001604	GenBank	gene	891	1468	.	+	1	ID=T7p01;Dbxref=GeneID:1261063;Name=T7p01;Note=gene 0.3;
+NC_001604	GenBank	mRNA	891	1468	.	+	1	ID=T7p01.t01;Parent=T7p01;Dbxref=GeneID:1261063;Name=T7p01;product=0.3 mRNA;
+NC_001604	GenBank	CDS	925	1278	.	+	1	ID=T7p01.p01;Parent=T7p01.t01;Dbxref=UniProtKB/Swiss-Prot:P03775,GeneID:1261063;Name=T7p01;Note=gene 0.3%2C inhibits EcoB and EcoK host restriction;codon_start=1;product=hypothetical protein;protein_id=NP_041954.1;transl_table=11;translation=length.117;
+NC_001604	GenBank	exon	891	1468	.	+	1	Parent=T7p01.t01;Name=T7p01;
+NC_001604	GenBank	sequence_secondary_structure	1468	1468	.	+	1	Parent=T7p01;Name=T7p01;Note=RNase III site R0.5;
+NC_001604	GenBank	CDS	1278	1433	.	+	1	ID=T7p02;Dbxref=UniProtKB/Swiss-Prot:P03776,GeneID:1261053;Name=T7p02;Note=gene 0.4;codon_start=1;product=hypothetical protein;protein_id=NP_041955.1;transl_table=11;translation=length.51;
+NC_001604	GenBank	gene	1278	1433	.	+	1	ID=T7p02.gene;Alias=T7p02;Dbxref=GeneID:1261053;Name=T7p02;Note=gene 0.4;
+NC_001604	GenBank	gene	1469	3138	.	+	1	ID=T7p03;Dbxref=GeneID:1261070;Name=T7p03;Note=gene 0.7;
+NC_001604	GenBank	mRNA	1469	3138	.	+	1	ID=T7p03.t01;Parent=T7p03;Dbxref=GeneID:1261070;Name=T7p03;product=0.7 mRNA;
+NC_001604	GenBank	CDS	2021	3100	.	+	1	ID=T7p03.p01;Parent=T7p03.t01;Dbxref=GOA:P00513,UniProtKB/Swiss-Prot:P00513,GeneID:1261070;Name=T7p03;Note=The T7 seryl-threonyl protein kinase gp0.7 is involved in host transcription shutoff and Col Ib exclusion. Phosphorylates E. coli RNA polymerase. Other names: gp0.7.;codon_start=1;product=protein kinase;protein_id=NP_041959.1;transl_table=11;translation=length.359;
+NC_001604	GenBank	exon	1469	3138	.	+	1	Parent=T7p03.t01;Name=T7p03;
+NC_001604	GenBank	regulatory	3113	3113	.	+	1	Parent=T7p03;Name=T7p03;Note=E. coli C promoter;regulatory_class=promoter;
+NC_001604	GenBank	sequence_secondary_structure	3138	3138	.	+	1	Parent=T7p03;Name=T7p03;Note=RNase III site R1;
+NC_001604	GenBank	CDS	1496	1639	.	+	1	ID=T7p04;Dbxref=UniProtKB/Swiss-Prot:P03777,GeneID:1261051;Name=T7p04;Note=gene 0.5;codon_start=1;product=hypothetical protein;protein_id=NP_041956.1;transl_table=11;translation=length.47;
+NC_001604	GenBank	regulatory	1514	1514	.	+	1	Parent=T7p04;Name=T7p04;Note=E. coli B promoter;regulatory_class=promoter;
+NC_001604	GenBank	gene	1496	1639	.	+	1	ID=T7p04.gene;Alias=T7p04;Dbxref=GeneID:1261051;Name=T7p04;Note=gene 0.5;
+NC_001604	GenBank	CDS	1636	1794	.	+	1	ID=T7p05.cds1;Dbxref=UniProtKB/Swiss-Prot:P03778,GeneID:1261061;Name=T7p05;Note=possible gene 0.6B;codon_start=1;product=hypothetical protein;protein_id=NP_041957.1;transl_table=11;translation=length.111;
+NC_001604	GenBank	CDS	1796	1972	.	+	1	ID=T7p05.cds2;Dbxref=UniProtKB/Swiss-Prot:P03778,GeneID:1261061;Name=T7p05;Note=possible gene 0.6B;codon_start=1;product=hypothetical protein;protein_id=NP_041957.1;transl_table=11;translation=length.111;
+NC_001604	GenBank	gene	1636	1972	.	+	1	ID=T7p05.gene;Alias=T7p05;Dbxref=GeneID:1261061;Name=T7p05;Note=possible gene 0.6B;
+NC_001604	GenBank	CDS	1636	1797	.	+	1	ID=T7p06;Dbxref=UniProtKB/Swiss-Prot:P03778,GeneID:1261071;Name=T7p06;Note=gene 0.6A;codon_start=1;product=hypothetical protein;protein_id=NP_041958.1;transl_table=11;translation=length.53;
+NC_001604	GenBank	gene	1636	1797	.	+	1	ID=T7p06.gene;Alias=T7p06;Dbxref=GeneID:1261071;Name=T7p06;Note=gene 0.6A;
+NC_001604	GenBank	gene	3139	5887	.	+	1	ID=T7p07;Dbxref=GeneID:1261050;Name=T7p07;Note=gene 1;
+NC_001604	GenBank	mRNA	3139	5887	.	+	1	ID=T7p07.t01;Parent=T7p07;Dbxref=GeneID:1261050;Name=T7p07;product=1 mRNA;
+NC_001604	GenBank	CDS	3171	5822	.	+	1	ID=T7p07.p01;Parent=T7p07.t01;Dbxref=GOA:P00573,UniProtKB/Swiss-Prot:P00573,GeneID:1261050;Name=T7p07;Note=A family of single subunit RNA polymerases.;codon_start=1;product=T3/T7-like RNA polymerase;protein_id=NP_041960.1;transl_table=11;translation=length.883;
+NC_001604	GenBank	exon	3139	5887	.	+	1	Parent=T7p07.t01;Name=T7p07;
+NC_001604	GenBank	regulatory	5848	5848	.	+	1	Parent=T7p07;Name=T7p07;Note=T7 promoter phi1.1A;regulatory_class=promoter;
+NC_001604	GenBank	sequence_secondary_structure	5887	5887	.	+	1	Parent=T7p07;Name=T7p07;Note=RNase III site R1.1;
+NC_001604	GenBank	gene	5888	6448	.	+	1	ID=T7p08;Dbxref=GeneID:1261049;Name=T7p08;Note=gene 1.2;
+NC_001604	GenBank	mRNA	5888	6448	.	+	1	ID=T7p08.t01;Parent=T7p08;Dbxref=GeneID:1261049;Name=T7p08;product=1.1 mRNA;
+NC_001604	GenBank	CDS	6137	6394	.	+	1	ID=T7p08.p01;Parent=T7p08.t01;Dbxref=GOA:P03780,UniProtKB/Swiss-Prot:P03780,GeneID:1261049;Name=T7p08;Note=inhibits activity of the host dGTPase [dgt]. Essential only in strains that overexpress dGTPase [optA1 mutation]. In T7%2C gp1.2 also causes F plasmid exclusion. In T3%2C however%2C gp1.2 overcomes the exclusion system. Other names: dGTP triphosphohydrolase inhibitor%3B gp1.2.;codon_start=1;product=host dGTPase inhibitor;protein_id=NP_041962.1;transl_table=11;translation=length.85;
+NC_001604	GenBank	exon	5888	6448	.	+	1	Parent=T7p08.t01;Name=T7p08;
+NC_001604	GenBank	regulatory	5923	5923	.	+	1	Parent=T7p08;Name=T7p08;Note=T7 promoter phi1.1B;regulatory_class=promoter;
+NC_001604	GenBank	regulatory	6409	6409	.	+	1	Parent=T7p08;Name=T7p08;Note=T7 promoter phi1.3;regulatory_class=promoter;
+NC_001604	GenBank	sequence_secondary_structure	6448	6448	.	+	1	Parent=T7p08;Name=T7p08;Note=RNase III site R1.3;
+NC_001604	GenBank	CDS	6007	6135	.	+	1	ID=T7p09;Dbxref=UniProtKB/Swiss-Prot:P03779,GeneID:1261072;Name=T7p09;Note=other names: gp1.1;codon_start=1;product=hypothetical protein;protein_id=NP_041961.1;transl_table=11;translation=length.42;
+NC_001604	GenBank	gene	6007	6135	.	+	1	ID=T7p09.gene;Alias=T7p09;Dbxref=GeneID:1261072;Name=T7p09;Note=gene 1.1;
+NC_001604	GenBank	gene	6449	7588	.	+	1	ID=T7p10;Dbxref=GeneID:1261055;Name=T7p10;Note=gene 1.3;
+NC_001604	GenBank	mRNA	6449	7588	.	+	1	ID=T7p10.t01;Parent=T7p10;Dbxref=GeneID:1261055;Name=T7p10;product=1.3 mRNA;
+NC_001604	GenBank	CDS	6475	7554	.	+	1	ID=T7p10.p01;Parent=T7p10.t01;Dbxref=GOA:P00969,UniProtKB/Swiss-Prot:P00969,GeneID:1261055;Name=T7p10;Note=Catalyzes the ATP-dependent formation of a phosphodiester bond at the site of single-stranded breaks in double-stranded DNA. T7 ligase is essential in ligase-deficient hosts only.;codon_start=1;product=ATP-dependent DNA ligase;protein_id=NP_041963.1;transl_table=11;translation=length.359;
+NC_001604	GenBank	exon	6449	7588	.	+	1	Parent=T7p10.t01;Name=T7p10;
+NC_001604	GenBank	regulatory	7588	7588	.	+	1	Parent=T7p10;Name=T7p10;Note=E. coli transcription terminator TE;regulatory_class=terminator;
+NC_001604	GenBank	CDS	7608	7763	.	+	1	ID=T7p11;Dbxref=UniProtKB/Swiss-Prot:P03791,GeneID:1261075;Name=T7p11;Note=gene 1.4;codon_start=1;product=hypothetical protein;protein_id=NP_041964.1;transl_table=11;translation=length.51;
+NC_001604	GenBank	regulatory	7778	7778	.	+	1	ID=GenBank:regulatory:NC_001604:7778:7778;Note=T7 promoter phi1.5;regulatory_class=promoter;
+NC_001604	GenBank	gene	7608	7763	.	+	1	ID=T7p11.gene;Alias=T7p11;Dbxref=GeneID:1261075;Name=T7p11;Note=gene 1.4;
+NC_001604	GenBank	CDS	7791	7880	.	+	1	ID=T7p12;Dbxref=UniProtKB/Swiss-Prot:P03792,GeneID:1261074;Name=T7p12;Note=gene 1.5;codon_start=1;product=hypothetical protein;protein_id=NP_041965.1;transl_table=11;translation=length.29;
+NC_001604	GenBank	regulatory	7895	7895	.	+	1	ID=GenBank:regulatory:NC_001604:7895:7895;Note=T7 promoter phi1.6;regulatory_class=promoter;
+NC_001604	GenBank	gene	7791	7880	.	+	1	ID=T7p12.gene;Alias=T7p12;Dbxref=GeneID:1261074;Name=T7p12;Note=gene 1.5;
+NC_001604	GenBank	CDS	7906	8166	.	+	1	ID=T7p13;Dbxref=UniProtKB/Swiss-Prot:P03793,GeneID:1261076;Name=T7p13;Note=gene 1.6;codon_start=1;product=hypothetical protein;protein_id=NP_041966.1;transl_table=11;translation=length.86;
+NC_001604	GenBank	gene	7906	8166	.	+	1	ID=T7p13.gene;Alias=T7p13;Dbxref=GeneID:1261076;Name=T7p13;Note=gene 1.6;
+NC_001604	GenBank	CDS	8166	8756	.	+	1	ID=T7p14;Dbxref=UniProtKB/Swiss-Prot:P03781,GeneID:1261060;Name=T7p14;Note=gene 1.7;codon_start=1;product=hypothetical protein;protein_id=NP_041967.1;transl_table=11;translation=length.196;
+NC_001604	GenBank	gene	8166	8756	.	+	1	ID=T7p14.gene;Alias=T7p14;Dbxref=GeneID:1261060;Name=T7p14;Note=gene 1.7;
+NC_001604	GenBank	CDS	8749	8895	.	+	1	ID=T7p15;Dbxref=UniProtKB/Swiss-Prot:P03794,GeneID:1261054;Name=T7p15;Note=not essential in T7. Other names: gp1.8;codon_start=1;product=hypothetical protein;protein_id=NP_041968.1;transl_table=11;translation=length.48;
+NC_001604	GenBank	gene	8749	8895	.	+	1	ID=T7p15.gene;Alias=T7p15;Dbxref=GeneID:1261054;Name=T7p15;Note=gene 1.8;
+NC_001604	GenBank	CDS	8898	9092	.	+	1	ID=T7p16;Dbxref=UniProtKB/Swiss-Prot:P03704,GeneID:1261073;Name=T7p16;Note=T7 RNA polymerase inhibitor binds to host RNA pol and suppresses its activity on a subset of promoters. gp2 deficient T7 display reduced DNA replication and premature breakdown of replicating DNA%2C specifically at the left end of the genome%2C along with the presence of empty proheads. Rifampin can compensate for the missing gp2 function. Other names: gp2.;codon_start=1;product=inhibitor of host bacterial RNA polymerase;protein_id=NP_041969.1;transl_table=11;translation=length.64;
+NC_001604	GenBank	regulatory	9107	9107	.	+	1	ID=GenBank:regulatory:NC_001604:9107:9107;Note=T7 promoter phi2.5;regulatory_class=promoter;
+NC_001604	GenBank	gene	8898	9092	.	+	1	ID=T7p16.gene;Alias=T7p16;Dbxref=GeneID:1261073;Name=T7p16;Note=gene 2;
+NC_001604	GenBank	CDS	9158	9856	.	+	1	ID=T7p17;Dbxref=GOA:P03696,UniProtKB/Swiss-Prot:P03696,GeneID:1261080;Name=T7p17;Note=binds single-stranded DNA. In phage T7 gp2.5 is essential for DNA replication and recombination. Other names: gp2.5%3B SSB.;codon_start=1;product=single-stranded DNA-binding protein;protein_id=NP_041970.1;transl_table=11;translation=length.232;
+NC_001604	GenBank	gene	9158	9856	.	+	1	ID=T7p17.gene;Alias=T7p17;Dbxref=GeneID:1261080;Name=T7p17;Note=gene 2.5;
+NC_001604	GenBank	CDS	9857	10276	.	+	1	ID=T7p18;Dbxref=GOA:P03795,UniProtKB/Swiss-Prot:P03795,GeneID:1261078;Name=T7p18;Note=gene 2.8;codon_start=1;product=hypothetical protein;protein_id=NP_041971.1;transl_table=11;translation=length.139;
+NC_001604	GenBank	gene	9857	10276	.	+	1	ID=T7p18.gene;Alias=T7p18;Dbxref=GeneID:1261078;Name=T7p18;Note=gene 2.8;
+NC_001604	GenBank	CDS	10257	10706	.	+	1	ID=T7p19;Dbxref=GOA:P00641,UniProtKB/Swiss-Prot:P00641,GeneID:1261079;Name=T7p19;Note=T7 endonuclease I is a Holliday junction resolvase encoded by T7 gene 3. Mutants in gene 3 are defective in recombination and accumulate branched DNA. Endonuclease I may also play a role in the degradation of the host genome following infection with T7.;codon_start=1;product=endonuclease I;protein_id=NP_041972.1;transl_table=11;translation=length.149;
+NC_001604	GenBank	gene	10257	10706	.	+	1	ID=T7p19.gene;Alias=T7p19;Dbxref=GeneID:1261079;Name=T7p19;Note=gene 3;
+NC_001604	GenBank	CDS	10706	11161	.	+	1	ID=T7p20;Dbxref=GOA:P00806,UniProtKB/Swiss-Prot:P00806,GeneID:1261077;Name=T7p20;Note=T7 lysozyme hydrolyzes an amide bond in the host cell wall following its release from the cytoplasm. In addition%2C T7 lysozyme inhibits T7 RNA polymerase initiation. This inhibition is greater for class II promoters than class III promoters and therefore may aid in temporal regulation of transcription and the switch to particle assembly. In T7%2C lysozyme%2C unlike the T7 holin%2C is expressed with and lies in the same region as the replication genes. Lack of gp3.5 reduces replication and burst size and delays%2C but does not completely prevent lysis. Mutations in the muralytic domain of gene 16%2C an inner capsid protein%2C can partially compensate for a deletion of gp3.5. Other names: gp3.5%3B amidase%3B N-acetylmuramoyl-L-alanine amidase;codon_start=1;product=lysozyme;protein_id=NP_041973.1;transl_table=11;translation=length.151;
+NC_001604	GenBank	regulatory	11180	11180	.	+	1	ID=GenBank:regulatory:NC_001604:11180:11180;Note=T7 promoter phi3.8;regulatory_class=promoter;
+NC_001604	GenBank	sequence_secondary_structure	11203	11203	.	+	1	ID=GenBank:sequence_secondary_structure:NC_001604:11203:11203;Note=possible RNase III site R3.8;
+NC_001604	GenBank	gene	10706	11161	.	+	1	ID=T7p20.gene;Alias=T7p20;Dbxref=GeneID:1261077;Name=T7p20;Note=gene 3.5;
+NC_001604	GenBank	CDS	11225	11590	.	+	1	ID=T7p21;Dbxref=GOA:P03797,UniProtKB/Swiss-Prot:P03797,GeneID:1261065;Name=T7p21;Note=gene 3.8;codon_start=1;product=putative NHN endonuclease;protein_id=NP_041974.1;transl_table=11;translation=length.121;
+NC_001604	GenBank	gene	11225	11590	.	+	1	ID=T7p21.gene;Alias=T7p21;Dbxref=GeneID:1261065;Name=T7p21;Note=gene 3.8;
+NC_001604	GenBank	CDS	11565	13265	.	+	1	ID=T7p22;Dbxref=GOA:P03692,UniProtKB/Swiss-Prot:P03692,GeneID:1261046;Name=T7p22;Note=gene 4A%2C primase/helicase [14%2C15];codon_start=1;product=DNA primase/helicase;protein_id=NP_041975.1;transl_table=11;translation=length.566;
+NC_001604	GenBank	gene	11565	13265	.	+	1	ID=T7p22.gene;Alias=T7p22;Dbxref=GeneID:1261046;Name=T7p22;Note=gene 4A;
+NC_001604	GenBank	CDS	11635	11757	.	+	1	ID=T7p23;Dbxref=UniProtKB/Swiss-Prot:P03782,GeneID:1261047;Name=T7p23;Note=gene 4.1;codon_start=1;product=hypothetical protein;protein_id=NP_041976.1;transl_table=11;translation=length.40;
+NC_001604	GenBank	gene	11635	11757	.	+	1	ID=T7p23.gene;Alias=T7p23;Dbxref=GeneID:1261047;Name=T7p23;Note=gene 4.1;
+NC_001604	GenBank	CDS	11754	13265	.	+	1	ID=T7p24;Dbxref=GOA:P03692,UniProtKB/Swiss-Prot:P03692,GeneID:1261048;Name=T7p24;Note=gene 4B/helicase [14%2C15];codon_start=1;product=helicase;protein_id=NP_041977.1;transl_table=11;translation=length.503;
+NC_001604	GenBank	regulatory	12671	12671	.	+	1	Parent=T7p24;Name=T7p24;Note=T7 promoter phi4c;regulatory_class=promoter;
+NC_001604	GenBank	gene	11754	13265	.	+	1	ID=T7p24.gene;Alias=T7p24;Dbxref=GeneID:1261048;Name=T7p24;Note=gene 4B;
+NC_001604	GenBank	CDS	12988	13326	.	+	1	ID=T7p25;Dbxref=UniProtKB/Swiss-Prot:P03783,GeneID:1261021;Name=T7p25;Note=gene 4.2;codon_start=1;product=hypothetical protein;protein_id=NP_041978.1;transl_table=11;translation=length.112;
+NC_001604	GenBank	regulatory	13341	13341	.	+	1	ID=GenBank:regulatory:NC_001604:13341:13341;Note=T7 promoter phi4.3;regulatory_class=promoter;
+NC_001604	GenBank	gene	12988	13326	.	+	1	ID=T7p25.gene;Alias=T7p25;Dbxref=GeneID:1261021;Name=T7p25;Note=gene 4.2;
+NC_001604	GenBank	CDS	13352	13564	.	+	1	ID=T7p26;Dbxref=UniProtKB/Swiss-Prot:P03784,GeneID:1261069;Name=T7p26;Note=not essential in T7%3B Other names: gp4.3.;codon_start=1;product=hypothetical protein;protein_id=NP_041979.1;transl_table=11;translation=length.70;
+NC_001604	GenBank	gene	13352	13564	.	+	1	ID=T7p26.gene;Alias=T7p26;Dbxref=GeneID:1261069;Name=T7p26;Note=gene 4.3;
+NC_001604	GenBank	CDS	13584	13853	.	+	1	ID=T7p27;Dbxref=UniProtKB/Swiss-Prot:P03785,GeneID:1261059;Name=T7p27;Note=not essential in T7. Other names: gp4.5.;codon_start=1;product=hypothetical protein;protein_id=NP_041980.1;transl_table=11;translation=length.89;
+NC_001604	GenBank	sequence_secondary_structure	13892	13892	.	+	1	ID=GenBank:sequence_secondary_structure:NC_001604:13892:13892;Note=RNase III site R4.7;
+NC_001604	GenBank	regulatory	13915	13915	.	+	1	ID=GenBank:regulatory:NC_001604:13915:13915;Note=T7 promoter phi4.7;regulatory_class=promoter;
+NC_001604	GenBank	gene	13584	13853	.	+	1	ID=T7p27.gene;Alias=T7p27;Dbxref=GeneID:1261059;Name=T7p27;Note=gene 4.5;
+NC_001604	GenBank	CDS	13927	14334	.	+	1	ID=T7p28;Dbxref=UniProtKB/Swiss-Prot:P03786,GeneID:1261043;Name=T7p28;Note=gene 4.7;codon_start=1;product=hypothetical protein;protein_id=NP_041981.1;transl_table=11;translation=length.135;
+NC_001604	GenBank	gene	13927	14334	.	+	1	ID=T7p28.gene;Alias=T7p28;Dbxref=GeneID:1261043;Name=T7p28;Note=gene 4.7;
+NC_001604	GenBank	CDS	14353	16467	.	+	1	ID=T7p29;Dbxref=GOA:P00581,UniProtKB/Swiss-Prot:P00581,GeneID:1261044;Name=T7p29;Note=gene 5;codon_start=1;product=DNA polymerase;protein_id=NP_041982.1;transl_table=11;translation=length.704;
+NC_001604	GenBank	gene	14353	16467	.	+	1	ID=T7p29.gene;Alias=T7p29;Dbxref=GeneID:1261044;Name=T7p29;Note=gene 5;
+NC_001604	GenBank	CDS	16483	16839	.	+	1	ID=T7p30;Dbxref=UniProtKB/Swiss-Prot:P03798,GeneID:1261045;Name=T7p30;Note=gene 5.3;codon_start=1;product=hypothetical protein;protein_id=NP_041983.1;transl_table=11;translation=length.118;
+NC_001604	GenBank	gene	16483	16839	.	+	1	ID=T7p30.gene;Alias=T7p30;Dbxref=GeneID:1261045;Name=T7p30;Note=gene 5.3;
+NC_001604	GenBank	CDS	16851	17147	.	+	1	ID=T7p31.cds1;Dbxref=GOA:P03787,UniProtKB/Swiss-Prot:P03787,GeneID:1261041;Name=T7p31;Note=possible gene 5.5-5.7;codon_start=1;product=hypothetical protein;protein_id=NP_041984.1;transl_table=11;translation=length.169;
+NC_001604	GenBank	CDS	17147	17359	.	+	1	ID=T7p31.cds2;Dbxref=GOA:P03787,UniProtKB/Swiss-Prot:P03787,GeneID:1261041;Name=T7p31;Note=possible gene 5.5-5.7;codon_start=1;product=hypothetical protein;protein_id=NP_041984.1;transl_table=11;translation=length.169;
+NC_001604	GenBank	gene	16851	17359	.	+	1	ID=T7p31.gene;Alias=T7p31;Dbxref=GeneID:1261041;Name=T7p31;Note=possible gene 5.5-5.7;
+NC_001604	GenBank	CDS	16851	17150	.	+	1	ID=T7p32;Dbxref=GOA:P03787,UniProtKB/Swiss-Prot:P03787,GeneID:1261038;Name=T7p32;Note=in Enterobacteria phage T7%2C gp5.5 abolishes E. coli nucleoid protein H-NS-mediated inhibition of transcription by T7 RNA polymerases in vitro. Not essential%2C but mutants have lower burst size. Mutants in this gene are not capable of replicating in phage lambda lysogens. Other names: gp5.5;codon_start=1;product=host protein H-NS-interacting protein;protein_id=NP_041985.1;transl_table=11;translation=length.99;
+NC_001604	GenBank	gene	16851	17150	.	+	1	ID=T7p32.gene;Alias=T7p32;Dbxref=GeneID:1261038;Name=T7p32;Note=gene 5.5;
+NC_001604	GenBank	CDS	17150	17359	.	+	1	ID=T7p33;Dbxref=GOA:P03787,UniProtKB/Swiss-Prot:P03787,GeneID:1261040;Name=T7p33;Note=gene 5.7;codon_start=1;product=hypothetical protein;protein_id=NP_041986.1;transl_table=11;translation=length.69;
+NC_001604	GenBank	gene	17150	17359	.	+	1	ID=T7p33.gene;Alias=T7p33;Dbxref=GeneID:1261040;Name=T7p33;Note=gene 5.7;
+NC_001604	GenBank	CDS	17359	17517	.	+	1	ID=T7p34;Dbxref=UniProtKB/Swiss-Prot:P20406,GeneID:1261037;Name=T7p34;Note=not essential. Other names: gp5.9%3B exonuclease V inhibitor;codon_start=1;product=host recBCD nuclease inhibitor;protein_id=NP_041987.1;transl_table=11;translation=length.52;
+NC_001604	GenBank	gene	17359	17517	.	+	1	ID=T7p34.gene;Alias=T7p34;Dbxref=GeneID:1261037;Name=T7p34;Note=gene 5.9;
+NC_001604	GenBank	CDS	17504	18406	.	+	1	ID=T7p35;Dbxref=GOA:P00638,UniProtKB/Swiss-Prot:P00638,GeneID:1261052;Name=T7p35;Note=The T7 exonuclease encoded by gene 6 is required for (a) recombination and (b) for the degradation of host chromosomal DNA. The latter process provides nucleotides for phage DNA replication. Both processes are carried out together with the T7 gene 3-encoded endonuclease/Holliday junction resolvase. In addition%2C the exonuclease also functions as an RNase H that removes RNA primers during DNA replication and promotes concatemer formation.;codon_start=1;product=exonuclease;protein_id=NP_041988.1;transl_table=11;translation=length.300;
+NC_001604	GenBank	gene	17504	18406	.	+	1	ID=T7p35.gene;Alias=T7p35;Dbxref=GeneID:1261052;Name=T7p35;Note=gene 6;
+NC_001604	GenBank	CDS	18394	18507	.	+	1	ID=T7p36;Dbxref=UniProtKB/Swiss-Prot:P03799,GeneID:1261058;Name=T7p36;Note=gene 6.3;codon_start=1;product=hypothetical protein;protein_id=NP_041989.1;transl_table=11;translation=length.37;
+NC_001604	GenBank	regulatory	18545	18545	.	+	1	ID=GenBank:regulatory:NC_001604:18545:18545;Note=T7 promoter phi6.5;regulatory_class=promoter;
+NC_001604	GenBank	sequence_secondary_structure	18563	18563	.	+	1	ID=GenBank:sequence_secondary_structure:NC_001604:18563:18563;Note=RNase III site R6.5;
+NC_001604	GenBank	gene	18394	18507	.	+	1	ID=T7p36.gene;Alias=T7p36;Dbxref=GeneID:1261058;Name=T7p36;Note=gene 6.3;
+NC_001604	GenBank	CDS	18605	18859	.	+	1	ID=T7p37;Dbxref=UniProtKB/Swiss-Prot:P03800,GeneID:1261036;Name=T7p37;Note=gene 6.5;codon_start=1;product=hypothetical protein;protein_id=NP_041990.1;transl_table=11;translation=length.84;
+NC_001604	GenBank	gene	18605	18859	.	+	1	ID=T7p37.gene;Alias=T7p37;Dbxref=GeneID:1261036;Name=T7p37;Note=gene 6.5;
+NC_001604	GenBank	CDS	18864	19130	.	+	1	ID=T7p38;Dbxref=UniProtKB/Swiss-Prot:P03801,GeneID:1261039;Name=T7p38;Note=may be involved in virion morphogenesis and is injected from virion into host cell. Other names: gp6.7;codon_start=1;product=hypothetical protein;protein_id=NP_041991.1;transl_table=11;translation=length.88;
+NC_001604	GenBank	gene	18864	19130	.	+	1	ID=T7p38.gene;Alias=T7p38;Dbxref=GeneID:1261039;Name=T7p38;Note=gene 6.7;
+NC_001604	GenBank	CDS	19130	19531	.	+	1	ID=T7p39;Dbxref=UniProtKB/Swiss-Prot:P03750,GeneID:1261056;Name=T7p39;Note=gene 7%2C host range;codon_start=1;product=hypothetical protein;protein_id=NP_041992.1;transl_table=11;translation=length.133;
+NC_001604	GenBank	gene	19130	19531	.	+	1	ID=T7p39.gene;Alias=T7p39;Dbxref=GeneID:1261056;Name=T7p39;Note=gene 7;
+NC_001604	GenBank	CDS	19535	19834	.	+	1	ID=T7p40;Dbxref=UniProtKB/Swiss-Prot:P03751,GeneID:1261035;Name=T7p40;Note=required for virion infectivity but not morphogenesis. In T7%2C gp 7.3 appears to be required for the assembly of tail fibers on capsids. Other names: gp7.3;codon_start=1;product=tail assembly protein;protein_id=NP_041993.1;transl_table=11;translation=length.99;
+NC_001604	GenBank	gene	19535	19834	.	+	1	ID=T7p40.gene;Alias=T7p40;Dbxref=GeneID:1261035;Name=T7p40;Note=gene 7.3;
+NC_001604	GenBank	CDS	19848	20240	.	+	1	ID=T7p41;Dbxref=GOA:P03796,UniProtKB/Swiss-Prot:P03796,GeneID:1261028;Name=T7p41;Note=gene 7.7;codon_start=1;product=hypothetical protein;protein_id=NP_041994.1;transl_table=11;translation=length.130;
+NC_001604	GenBank	gene	19848	20240	.	+	1	ID=T7p41.gene;Alias=T7p41;Dbxref=GeneID:1261028;Name=T7p41;Note=gene 7.7;
+NC_001604	GenBank	CDS	20240	21850	.	+	1	ID=T7p42;Dbxref=GOA:P03728,UniProtKB/Swiss-Prot:P03728,GeneID:1261033;Name=T7p42;Note=gene 8;codon_start=1;product=head-tail connector protein;protein_id=NP_041995.1;transl_table=11;translation=length.536;
+NC_001604	GenBank	regulatory	21865	21865	.	+	1	ID=GenBank:regulatory:NC_001604:21865:21865;Note=T7 promoter phi9;regulatory_class=promoter;
+NC_001604	GenBank	gene	20240	21850	.	+	1	ID=T7p42.gene;Alias=T7p42;Dbxref=GeneID:1261033;Name=T7p42;Note=gene 8;
+NC_001604	GenBank	CDS	21950	22873	.	+	1	ID=T7p43;Dbxref=GOA:P03716,UniProtKB/Swiss-Prot:P03716,GeneID:1261027;Name=T7p43;Note=Phage T7-like scaffolding protein. The protein is encoded by gene 9 in T7 (gp9) and is required for the formation of pro-capsids.;codon_start=1;product=capsid assembly protein;protein_id=NP_041996.1;transl_table=11;translation=length.307;
+NC_001604	GenBank	regulatory	22904	22904	.	+	1	ID=GenBank:regulatory:NC_001604:22904:22904;Note=T7 promoter phi10;regulatory_class=promoter;
+NC_001604	GenBank	gene	21950	22873	.	+	1	ID=T7p43.gene;Alias=T7p43;Dbxref=GeneID:1261027;Name=T7p43;Note=gene 9;
+NC_001604	GenBank	CDS	22967	23989	.	+	1	ID=T7p44.cds1;Dbxref=GOA:P19727,UniProtKB/Swiss-Prot:P19727,GeneID:1261029;Name=T7p44;Note=major capsid protein. Involved in F-exclusion of wt T7 phage. A minor capsid protein (gp10B) is produced from gene 10 by a -1 frameshift towards the end of 10A%2C resulting in a slightly larger protein. Other names: gp10A.;codon_start=1;product=major capsid protein;protein_id=NP_041997.1;transl_table=11;translation=length.398;
+NC_001604	GenBank	CDS	23989	24162	.	+	1	ID=T7p44.cds2;Dbxref=GOA:P19727,UniProtKB/Swiss-Prot:P19727,GeneID:1261029;Name=T7p44;Note=major capsid protein. Involved in F-exclusion of wt T7 phage. A minor capsid protein (gp10B) is produced from gene 10 by a -1 frameshift towards the end of 10A%2C resulting in a slightly larger protein. Other names: gp10A.;codon_start=1;product=major capsid protein;protein_id=NP_041997.1;transl_table=11;translation=length.398;
+NC_001604	GenBank	gene	22967	24162	.	+	1	ID=T7p44.gene;Alias=T7p44;Dbxref=GeneID:1261029;Name=T7p44;Note=gene 10B;
+NC_001604	GenBank	CDS	22967	24004	.	+	1	ID=T7p45;Dbxref=GOA:P19726,UniProtKB/Swiss-Prot:P19726,GeneID:1261026;Name=T7p45;Note=major capsid protein. Involved in F-exclusion of wt T7 phage. A minor capsid protein (gp10B) is produced from gene 10 by a -1 frameshift towards the end of 10A%2C resulting in a slightly larger protein. Other names: gp10A.;codon_start=1;product=major capsid protein;protein_id=NP_041998.1;transl_table=11;translation=length.345;
+NC_001604	GenBank	regulatory	24210	24210	.	+	1	ID=GenBank:regulatory:NC_001604:24210:24210;Note=T7 transcription terminator Tphi;regulatory_class=terminator;
+NC_001604	GenBank	gene	22967	24004	.	+	1	ID=T7p45.gene;Alias=T7p45;Dbxref=GeneID:1261026;Name=T7p45;Note=gene 10A;
+NC_001604	GenBank	CDS	24228	24818	.	+	1	ID=T7p46;Dbxref=UniProtKB/Swiss-Prot:P03746,GeneID:1261030;Name=T7p46;Note=Tail tubular proteins A and B are required for assembly of tails of T7-like phages.;codon_start=1;product=tail tubular protein A;protein_id=NP_041999.1;transl_table=11;translation=length.196;
+NC_001604	GenBank	gene	24228	24818	.	+	1	ID=T7p46.gene;Alias=T7p46;Dbxref=GeneID:1261030;Name=T7p46;Note=gene 11;
+NC_001604	GenBank	CDS	24842	27226	.	+	1	ID=T7p47;Dbxref=UniProtKB/Swiss-Prot:P03747,GeneID:1261024;Name=T7p47;Note=gene 12;codon_start=1;product=tail tubular protein B;protein_id=NP_042000.1;transl_table=11;translation=length.794;
+NC_001604	GenBank	regulatory	27274	27274	.	+	1	ID=GenBank:regulatory:NC_001604:27274:27274;Note=T7 promoter phi13;regulatory_class=promoter;
+NC_001604	GenBank	sequence_secondary_structure	27281	27281	.	+	1	ID=GenBank:sequence_secondary_structure:NC_001604:27281:27281;Note=possible RNase III site R13;
+NC_001604	GenBank	gene	24842	27226	.	+	1	ID=T7p47.gene;Alias=T7p47;Dbxref=GeneID:1261024;Name=T7p47;Note=gene 12;
+NC_001604	GenBank	CDS	27307	27723	.	+	1	ID=T7p48;Dbxref=UniProtKB/Swiss-Prot:P03723,GeneID:1261025;Name=T7p48;Note=gene 13;codon_start=1;product=internal virion protein A;protein_id=NP_042001.1;transl_table=11;translation=length.138;
+NC_001604	GenBank	gene	27307	27723	.	+	1	ID=T7p48.gene;Alias=T7p48;Dbxref=GeneID:1261025;Name=T7p48;Note=gene 13;
+NC_001604	GenBank	CDS	27728	28318	.	+	1	ID=T7p49;Dbxref=UniProtKB/Swiss-Prot:P03724,GeneID:1261032;Name=T7p49;Note=Approximately 12 copies of the internal virion protein B encoded in phage T7 by gene 14 are part of the internal core of the T7 virion. Along with gp16 and gp15%2C the other internal core proteins%2C gp14 is ejected from the phage head and forms part of a putative channel that spans the entire host cell envelope and allows entry of DNA. gp14 appears to localize to the outer host membrane after ejection. Other names: gp14;codon_start=1;product=internal virion protein B;protein_id=NP_042002.1;transl_table=11;translation=length.196;
+NC_001604	GenBank	gene	27728	28318	.	+	1	ID=T7p49.gene;Alias=T7p49;Dbxref=GeneID:1261032;Name=T7p49;Note=gene 14;
+NC_001604	GenBank	CDS	28325	30568	.	+	1	ID=T7p50;Dbxref=UniProtKB/Swiss-Prot:P03725,GeneID:1261034;Name=T7p50;Note=Approximately 12 copies of the internal virion protein C encoded by phage T7 gene 15 (gp15) are part of the internal core of the T7 virion. Along with gp14 and gp16%2C the other internal core proteins%2C gp15 is ejected from the phage head and forms part of a putative channel that spans the entire host cell envelope and allows entry of DNA.;codon_start=1;product=internal virion protein C;protein_id=NP_042003.1;transl_table=11;translation=length.747;
+NC_001604	GenBank	gene	28325	30568	.	+	1	ID=T7p50.gene;Alias=T7p50;Dbxref=GeneID:1261034;Name=T7p50;Note=gene 15;
+NC_001604	GenBank	CDS	30595	34551	.	+	1	ID=T7p51;Dbxref=GOA:P03726,UniProtKB/Swiss-Prot:P03726,GeneID:1261031;Name=T7p51;Note=Approximately 3 copies of the internal virion protein D encoded by phage T7 gene 16 (gp16) are part of the internal core of the T7 virion. Along with gp14 and gp15%2C the other internal core proteins%2C gp16 is ejected from the phage head and forms part of a putative channel that spans the entire host cell envelope and allows entry of DNA. The N-terminus has similarity to a lytic transglycosylase and may help form a channel for phage DNA translocation through the crosslinked peptidoglycan layer of the host envelope.;codon_start=1;product=internal virion protein D;protein_id=NP_042004.1;transl_table=11;translation=length.1318;
+NC_001604	GenBank	regulatory	34566	34566	.	+	1	ID=GenBank:regulatory:NC_001604:34566:34566;Note=T7 promoter phi17;regulatory_class=promoter;
+NC_001604	GenBank	gene	30595	34551	.	+	1	ID=T7p51.gene;Alias=T7p51;Dbxref=GeneID:1261031;Name=T7p51;Note=gene 16;
+NC_001604	GenBank	CDS	34624	36285	.	+	1	ID=T7p52;Dbxref=UniProtKB/Swiss-Prot:P03748,GeneID:1261023;Name=T7p52;Note=in phages T7 and T3 trimers of gp17 form each of the 6 kinked tail fibers. Other names: gp17.;codon_start=1;product=tail fiber protein;protein_id=NP_042005.1;transl_table=11;translation=length.553;
+NC_001604	GenBank	gene	34624	36285	.	+	1	ID=T7p52.gene;Alias=T7p52;Dbxref=GeneID:1261023;Name=T7p52;Note=gene 17;
+NC_001604	GenBank	CDS	36344	36547	.	+	1	ID=T7p53;Dbxref=GOA:P03802,UniProtKB/Swiss-Prot:P03802,GeneID:1261022;Name=T7p53;Note=Type II holins have two putative transmembrane domains and are thought to allow endolysins access to the cell wall at the optimal lysis time. However%2C in phage T7 the holin protein gp17.5 does not appear to be essential and gp17.5 mutants only show a minor delay in lysis. Other names: gp17.5%3B lysis protein;codon_start=1;product=type II holin;protein_id=NP_042006.1;transl_table=11;translation=length.67;
+NC_001604	GenBank	gene	36344	36547	.	+	1	ID=T7p53.gene;Alias=T7p53;Dbxref=GeneID:1261022;Name=T7p53;Note=gene 17.5;
+NC_001604	GenBank	CDS	36553	36822	.	+	1	ID=T7p54;Dbxref=GOA:P03693,UniProtKB/Swiss-Prot:P03693,GeneID:1261042;Name=T7p54;Note=involved in the packaging of genome monomers into a procapsid using head-to-tail concatemers of genomes. other names: DNA packaging protein A%3B DNA maturation protein A%3B terminase%2C small subunit;codon_start=1;product=DNA packaging protein%2C small subunit;protein_id=NP_042007.1;transl_table=11;translation=length.89;
+NC_001604	GenBank	regulatory	36836	36836	.	+	1	ID=GenBank:regulatory:NC_001604:36836:36836;Note=E. coli promoter E[6];regulatory_class=promoter;
+NC_001604	GenBank	sequence_secondary_structure	36856	36856	.	+	1	ID=GenBank:sequence_secondary_structure:NC_001604:36856:36856;Note=RNase III site R18.5;
+NC_001604	GenBank	gene	36553	36822	.	+	1	ID=T7p54.gene;Alias=T7p54;Dbxref=GeneID:1261042;Name=T7p54;Note=gene 18;
+NC_001604	GenBank	CDS	36917	37348	.	+	1	ID=T7p55;Dbxref=GOA:P03803,UniProtKB/Swiss-Prot:P03803,GeneID:1261067;Name=T7p55;Note=analog of phage lambda protein Rz%2C a cell lysis protein. Rz and gp18.5 share distant sequence similarity%2C similar function%2C and a similar genome neighborhood. In T7%2C gp18.5 interacts with gp18.7%2C a lambda RZ1-like lysis protein. Other names: gp18.5;codon_start=1;product=phage lambda Rz-like lysis protein;protein_id=NP_042008.1;transl_table=11;translation=length.143;
+NC_001604	GenBank	gene	36917	37348	.	+	1	ID=T7p55.gene;Alias=T7p55;Dbxref=GeneID:1261067;Name=T7p55;Note=gene 18.5;
+NC_001604	GenBank	CDS	37032	37283	.	+	1	ID=T7p56;Dbxref=UniProtKB/Swiss-Prot:P03788,GeneID:1261057;Name=T7p56;Note=in Enterobacteria phage T7%2C this protein interacts with gp18.5 and is expressed from the -1 frame of a gene completely overlapping gene 18.5. This suggests that it may be an analog of lambda lysis protein Rz1. Other names: gp18.7.;codon_start=1;product=phage lambda Rz1-like protein;protein_id=NP_042009.1;transl_table=11;translation=length.83;
+NC_001604	GenBank	gene	37032	37283	.	+	1	ID=T7p56.gene;Alias=T7p56;Dbxref=GeneID:1261057;Name=T7p56;Note=gene 18.7;
+NC_001604	GenBank	CDS	37370	39130	.	+	1	ID=T7p57;Dbxref=GOA:P03694,UniProtKB/Swiss-Prot:P03694,GeneID:1261062;Name=T7p57;Note=gene 19;codon_start=1;product=DNA maturation protein;protein_id=NP_042010.1;transl_table=11;translation=length.586;
+NC_001604	GenBank	gene	37370	39130	.	+	1	ID=T7p57.gene;Alias=T7p57;Dbxref=GeneID:1261062;Name=T7p57;Note=gene 19;
+NC_001604	GenBank	CDS	38016	38273	.	+	1	ID=T7p58;Dbxref=UniProtKB/Swiss-Prot:P03789,GeneID:1261064;Name=T7p58;Note=gene 19.2;codon_start=1;product=hypothetical protein;protein_id=NP_042011.1;transl_table=11;translation=length.85;
+NC_001604	GenBank	gene	38016	38273	.	+	1	ID=T7p58.gene;Alias=T7p58;Dbxref=GeneID:1261064;Name=T7p58;Note=gene 19.2;
+NC_001604	GenBank	CDS	38553	38726	.	+	1	ID=T7p59;Dbxref=UniProtKB/Swiss-Prot:P03790,GeneID:1261066;Name=T7p59;Note=gene 19.3;codon_start=1;product=hypothetical protein;protein_id=NP_042012.1;transl_table=11;translation=length.57;
+NC_001604	GenBank	regulatory	39229	39229	.	+	1	ID=GenBank:regulatory:NC_001604:39229:39229;Note=T7 promoter phiOR;regulatory_class=promoter;
+NC_001604	GenBank	gene	38553	38726	.	+	1	ID=T7p59.gene;Alias=T7p59;Dbxref=GeneID:1261066;Name=T7p59;Note=gene 19.3;
+NC_001604	GenBank	CDS	39389	39538	.	+	1	ID=T7p60;Dbxref=UniProtKB/Swiss-Prot:P03804,GeneID:1261068;Name=T7p60;Note=gene 19.5;codon_start=1;product=hypothetical protein;protein_id=NP_042013.1;transl_table=11;translation=length.49;
+NC_001604	GenBank	gene	39389	39538	.	+	1	ID=T7p60.gene;Alias=T7p60;Dbxref=GeneID:1261068;Name=T7p60;Note=gene 19.5;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/test-data/T7_TMHMM.gff3	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,39 @@
+##gff-version 3
+T7p04	feature	Chain	2	47	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_1-0e35f4c8-7d2d-457e-bf36-a121fcede87d;Note=Transmembrane protein - N out C in;Target=T7p04
+T7p04	TMHMM	Topological domain	1	22	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_1-0e35f4c8-7d2d-457e-bf36-a121fcede87d
+T7p04	TMHMM	Transmembrane	23	45	.	+	.	Note=Helical;Parent=tmhmm_tmd_1-0e35f4c8-7d2d-457e-bf36-a121fcede87d
+T7p04	TMHMM	Topological domain	46	47	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_1-0e35f4c8-7d2d-457e-bf36-a121fcede87d
+##gff-version 3
+T7p11	feature	Chain	2	51	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc;Note=Transmembrane protein - N in C in;Target=T7p11
+T7p11	TMHMM	Topological domain	1	4	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc
+T7p11	TMHMM	Transmembrane	5	24	.	+	.	Note=Helical;Parent=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc
+T7p11	TMHMM	Topological domain	25	27	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc
+T7p11	TMHMM	Transmembrane	28	50	.	+	.	Note=Helical;Parent=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc
+T7p11	TMHMM	Topological domain	51	51	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc
+##gff-version 3
+T7p25	feature	Chain	2	112	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9;Note=Transmembrane protein - N in C in;Target=T7p25
+T7p25	TMHMM	Topological domain	1	6	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9
+T7p25	TMHMM	Transmembrane	7	29	.	+	.	Note=Helical;Parent=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9
+T7p25	TMHMM	Topological domain	30	33	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9
+T7p25	TMHMM	Transmembrane	34	56	.	+	.	Note=Helical;Parent=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9
+T7p25	TMHMM	Topological domain	57	112	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9
+##gff-version 3
+T7p36	feature	Chain	2	37	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_4-4bb9c895-cf89-4510-b338-3e8027abfee8;Note=Transmembrane protein - N out C in;Target=T7p36
+T7p36	TMHMM	Topological domain	1	4	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_4-4bb9c895-cf89-4510-b338-3e8027abfee8
+T7p36	TMHMM	Transmembrane	5	24	.	+	.	Note=Helical;Parent=tmhmm_tmd_4-4bb9c895-cf89-4510-b338-3e8027abfee8
+T7p36	TMHMM	Topological domain	25	37	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_4-4bb9c895-cf89-4510-b338-3e8027abfee8
+##gff-version 3
+T7p53	feature	Chain	2	67	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_5-481eeec9-f604-43c0-a99b-3891bd8881bb;Note=Transmembrane protein - N out C in;Target=T7p53
+T7p53	TMHMM	Topological domain	1	36	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_5-481eeec9-f604-43c0-a99b-3891bd8881bb
+T7p53	TMHMM	Transmembrane	37	55	.	+	.	Note=Helical;Parent=tmhmm_tmd_5-481eeec9-f604-43c0-a99b-3891bd8881bb
+T7p53	TMHMM	Topological domain	56	67	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_5-481eeec9-f604-43c0-a99b-3891bd8881bb
+##gff-version 3
+T7p56	feature	Chain	2	83	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_6-32be98b3-e5bc-4ed2-b5ff-d031936e4a6e;Note=Transmembrane protein - N in C out;Target=T7p56
+T7p56	TMHMM	Topological domain	1	27	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_6-32be98b3-e5bc-4ed2-b5ff-d031936e4a6e
+T7p56	TMHMM	Transmembrane	28	50	.	+	.	Note=Helical;Parent=tmhmm_tmd_6-32be98b3-e5bc-4ed2-b5ff-d031936e4a6e
+T7p56	TMHMM	Topological domain	51	83	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_6-32be98b3-e5bc-4ed2-b5ff-d031936e4a6e
+##gff-version 3
+T7p60	feature	Chain	2	49	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_7-d18b863e-5930-430b-9a60-666e3a790599;Note=Transmembrane protein - N in C out;Target=T7p60
+T7p60	TMHMM	Topological domain	1	12	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_7-d18b863e-5930-430b-9a60-666e3a790599
+T7p60	TMHMM	Transmembrane	13	30	.	+	.	Note=Helical;Parent=tmhmm_tmd_7-d18b863e-5930-430b-9a60-666e3a790599
+T7p60	TMHMM	Topological domain	31	49	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_7-d18b863e-5930-430b-9a60-666e3a790599
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/test-data/T7_TMHMM_REBASE.gff3	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,33 @@
+##gff-version 3
+NC_001604	feature	Chain	1499	1636	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_1-0e35f4c8-7d2d-457e-bf36-a121fcede87d;Note=Transmembrane protein - N out C in;Target=T7p04;
+NC_001604	TMHMM	Topological domain	1496	1561	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_1-0e35f4c8-7d2d-457e-bf36-a121fcede87d;
+NC_001604	TMHMM	Transmembrane	1562	1630	.	+	.	Note=Helical;Parent=tmhmm_tmd_1-0e35f4c8-7d2d-457e-bf36-a121fcede87d;
+NC_001604	TMHMM	Topological domain	1631	1636	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_1-0e35f4c8-7d2d-457e-bf36-a121fcede87d;
+NC_001604	feature	Chain	7611	7760	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc;Note=Transmembrane protein - N in C in;Target=T7p11;
+NC_001604	TMHMM	Topological domain	7608	7619	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc;
+NC_001604	TMHMM	Transmembrane	7620	7679	.	+	.	Note=Helical;Parent=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc;
+NC_001604	TMHMM	Topological domain	7680	7688	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc;
+NC_001604	TMHMM	Transmembrane	7689	7757	.	+	.	Note=Helical;Parent=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc;
+NC_001604	TMHMM	Topological domain	7758	7760	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_2-f3395d64-94df-47d0-b5cd-b098a8fc57fc;
+NC_001604	feature	Chain	12991	13323	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9;Note=Transmembrane protein - N in C in;Target=T7p25;
+NC_001604	TMHMM	Topological domain	12988	13005	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9;
+NC_001604	TMHMM	Transmembrane	13006	13074	.	+	.	Note=Helical;Parent=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9;
+NC_001604	TMHMM	Topological domain	13075	13086	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9;
+NC_001604	TMHMM	Transmembrane	13087	13155	.	+	.	Note=Helical;Parent=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9;
+NC_001604	TMHMM	Topological domain	13156	13323	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_3-bc20f884-acf4-4cc9-a845-838c08833ba9;
+NC_001604	feature	Chain	18397	18504	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_4-4bb9c895-cf89-4510-b338-3e8027abfee8;Note=Transmembrane protein - N out C in;Target=T7p36;
+NC_001604	TMHMM	Topological domain	18394	18405	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_4-4bb9c895-cf89-4510-b338-3e8027abfee8;
+NC_001604	TMHMM	Transmembrane	18406	18465	.	+	.	Note=Helical;Parent=tmhmm_tmd_4-4bb9c895-cf89-4510-b338-3e8027abfee8;
+NC_001604	TMHMM	Topological domain	18466	18504	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_4-4bb9c895-cf89-4510-b338-3e8027abfee8;
+NC_001604	feature	Chain	36347	36544	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_5-481eeec9-f604-43c0-a99b-3891bd8881bb;Note=Transmembrane protein - N out C in;Target=T7p53;
+NC_001604	TMHMM	Topological domain	36344	36451	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_5-481eeec9-f604-43c0-a99b-3891bd8881bb;
+NC_001604	TMHMM	Transmembrane	36452	36508	.	+	.	Note=Helical;Parent=tmhmm_tmd_5-481eeec9-f604-43c0-a99b-3891bd8881bb;
+NC_001604	TMHMM	Topological domain	36509	36544	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_5-481eeec9-f604-43c0-a99b-3891bd8881bb;
+NC_001604	feature	Chain	37035	37280	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_6-32be98b3-e5bc-4ed2-b5ff-d031936e4a6e;Note=Transmembrane protein - N in C out;Target=T7p56;
+NC_001604	TMHMM	Topological domain	37032	37112	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_6-32be98b3-e5bc-4ed2-b5ff-d031936e4a6e;
+NC_001604	TMHMM	Transmembrane	37113	37181	.	+	.	Note=Helical;Parent=tmhmm_tmd_6-32be98b3-e5bc-4ed2-b5ff-d031936e4a6e;
+NC_001604	TMHMM	Topological domain	37182	37280	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_6-32be98b3-e5bc-4ed2-b5ff-d031936e4a6e;
+NC_001604	feature	Chain	39392	39535	.	+	.	Description=Transmembrane protein;ID=tmhmm_tmd_7-d18b863e-5930-430b-9a60-666e3a790599;Note=Transmembrane protein - N in C out;Target=T7p60;
+NC_001604	TMHMM	Topological domain	39389	39424	.	+	.	Note=Cytoplasmic;Parent=tmhmm_tmd_7-d18b863e-5930-430b-9a60-666e3a790599;
+NC_001604	TMHMM	Transmembrane	39425	39478	.	+	.	Note=Helical;Parent=tmhmm_tmd_7-d18b863e-5930-430b-9a60-666e3a790599;
+NC_001604	TMHMM	Topological domain	39479	39535	.	+	.	Note=Extracellular;Parent=tmhmm_tmd_7-d18b863e-5930-430b-9a60-666e3a790599;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/test-data/child.gff	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,2 @@
+#gff-version 3
+cds42	blastp	match_part	1	50	1e-40	.	.	ID=m00001;Notes=RNAse A Protein
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/test-data/nonprotein.gff	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,2 @@
+##gff-version 3
+PhageBob	blastp	match_part	300	349	1e-40	+	.	ID=m00001;Notes=RNAse A Protein;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/test-data/parent.gff	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,3 @@
+#gff-version 3
+PhageBob	maker	cds	300	500	.	+	.	ID=gene42
+PhageBob	maker	cds	300	500	.	+	.	Parent=gene42;ID=cds42
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rebase/test-data/proteins.gff	Fri Jun 17 04:00:49 2022 +0000
@@ -0,0 +1,2 @@
+##gff-version 3
+PhageBob	blastp	match_part	300	449	1e-40	+	.	ID=m00001;Notes=RNAse A Protein;