changeset 0:c3140b08d703 draft default tip

Uploaded
author cpt
date Fri, 17 Jun 2022 13:00:50 +0000 (2022-06-17)
parents
children
files cpt_phageqc_annotation/cpt-macros.xml cpt_phageqc_annotation/cpt.py cpt_phageqc_annotation/gff3.py cpt_phageqc_annotation/macros.xml cpt_phageqc_annotation/phage_annotation_validator.py cpt_phageqc_annotation/phage_annotation_validator.xml cpt_phageqc_annotation/phageqc_report_464.html cpt_phageqc_annotation/phageqc_report_annotation_table.html cpt_phageqc_annotation/phageqc_report_full.html cpt_phageqc_annotation/phageqc_report_genomea.tex cpt_phageqc_annotation/shinefind.py cpt_phageqc_annotation/test-data/AY216660.fasta cpt_phageqc_annotation/test-data/AY216660.gff3 cpt_phageqc_annotation/test-data/PhageQC_Out.gff3 cpt_phageqc_annotation/test-data/PhageQC_Out.html
diffstat 15 files changed, 6369 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/cpt-macros.xml	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,115 @@
+<?xml version="1.0"?>
+<macros>
+	<xml name="gff_requirements">
+		<requirements>
+			<requirement type="package" version="2.7">python</requirement>
+			<requirement type="package" version="1.65">biopython</requirement>
+			<requirement type="package" version="2.12.1">requests</requirement>
+			<yield/>
+		</requirements>
+		<version_command>
+		<![CDATA[
+			cd $__tool_directory__ && git rev-parse HEAD
+		]]>
+		</version_command>
+	</xml>
+	<xml name="citation/mijalisrasche">
+		<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+		<citation type="bibtex">@unpublished{galaxyTools,
+		author = {E. Mijalis, H. Rasche},
+		title = {CPT Galaxy Tools},
+		year = {2013-2017},
+		note = {https://github.com/tamu-cpt/galaxy-tools/}
+		}
+		</citation>
+	</xml>
+	<xml name="citations">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation> 
+		<yield/>
+		</citations>
+	</xml>
+    	<xml name="citations-crr">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Ross},
+				title = {CPT Galaxy Tools},
+				year = {2020-},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+		<yield/>
+		</citations>
+	</xml>
+        <xml name="citations-2020">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+                        <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+                        <yield/>
+		</citations>
+	</xml>
+        <xml name="citations-2020-AJC-solo">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+                        <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+                        <yield/>
+		</citations>
+	</xml>
+        <xml name="citations-clm">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+                        <yield/>
+		</citations>
+	</xml>
+        <xml name="sl-citations-clm">
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+                        <yield/>
+	</xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/cpt.py	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,342 @@
+#!/usr/bin/env python
+import regex as re
+from Bio.Seq import Seq, reverse_complement, translate
+from Bio.SeqRecord import SeqRecord
+from Bio import SeqIO
+from Bio.Data import CodonTable
+import logging
+
+logging.basicConfig()
+log = logging.getLogger()
+
+PHAGE_IN_MIDDLE = re.compile("^(?P<host>.*)\s*phage (?P<phage>.*)$")
+BACTERIOPHAGE_IN_MIDDLE = re.compile("^(?P<host>.*)\s*bacteriophage (?P<phage>.*)$")
+STARTS_WITH_PHAGE = re.compile(
+    "^(bacterio|vibrio|Bacterio|Vibrio|)?[Pp]hage (?P<phage>.*)$"
+)
+NEW_STYLE_NAMES = re.compile("(?P<phage>v[A-Z]_[A-Z][a-z]{2}_.*)")
+
+
+def phage_name_parser(name):
+    host = None
+    phage = None
+    name = name.replace(", complete genome.", "")
+    name = name.replace(", complete genome", "")
+
+    m = BACTERIOPHAGE_IN_MIDDLE.match(name)
+    if m:
+        host = m.group("host")
+        phage = m.group("phage")
+        return (host, phage)
+
+    m = PHAGE_IN_MIDDLE.match(name)
+    if m:
+        host = m.group("host")
+        phage = m.group("phage")
+        return (host, phage)
+
+    m = STARTS_WITH_PHAGE.match(name)
+    if m:
+        phage = m.group("phage")
+        return (host, phage)
+
+    m = NEW_STYLE_NAMES.match(name)
+    if m:
+        phage = m.group("phage")
+        return (host, phage)
+
+    return (host, phage)
+
+
+class OrfFinder(object):
+    def __init__(self, table, ftype, ends, min_len, strand):
+        self.table = table
+        self.table_obj = CodonTable.ambiguous_generic_by_id[table]
+        self.ends = ends
+        self.ftype = ftype
+        self.min_len = min_len
+        self.starts = sorted(self.table_obj.start_codons)
+        self.stops = sorted(self.table_obj.stop_codons)
+        self.re_starts = re.compile("|".join(self.starts))
+        self.re_stops = re.compile("|".join(self.stops))
+        self.strand = strand
+
+    def locate(self, fasta_file, out_nuc, out_prot, out_bed, out_gff3):
+        seq_format = "fasta"
+        log.debug("Genetic code table %i" % self.table)
+        log.debug("Minimum length %i aa" % self.min_len)
+
+        out_count = 0
+
+        out_gff3.write("##gff-version 3\n")
+
+        for idx, record in enumerate(SeqIO.parse(fasta_file, seq_format)):
+            for i, (f_start, f_end, f_strand, n, t) in enumerate(
+                self.get_all_peptides(str(record.seq).upper())
+            ):
+                out_count += 1
+
+                descr = "length %i aa, %i bp, from %s..%s[%s] of %s" % (
+                    len(t),
+                    len(n),
+                    f_start,
+                    f_end,
+                    f_strand,
+                    record.description,
+                )
+                fid = record.id + "|%s%i" % (self.ftype, i + 1)
+
+                r = SeqRecord(Seq(n), id=fid, name="", description=descr)
+                t = SeqRecord(Seq(t), id=fid, name="", description=descr)
+
+                SeqIO.write(r, out_nuc, "fasta")
+                SeqIO.write(t, out_prot, "fasta")
+
+                nice_strand = "+" if f_strand == +1 else "-"
+
+                out_bed.write(
+                    "\t".join(
+                        map(str, [record.id, f_start, f_end, fid, 0, nice_strand])
+                    )
+                    + "\n"
+                )
+
+                out_gff3.write(
+                    "\t".join(
+                        map(
+                            str,
+                            [
+                                record.id,
+                                "getOrfsOrCds",
+                                "CDS",
+                                f_start + 1,
+                                f_end,
+                                ".",
+                                nice_strand,
+                                0,
+                                "ID=%s.%s.%s" % (self.ftype, idx, i + 1),
+                            ],
+                        )
+                    )
+                    + "\n"
+                )
+        log.info("Found %i %ss", out_count, self.ftype)
+
+    def start_chop_and_trans(self, s, strict=True):
+        """Returns offset, trimmed nuc, protein."""
+        if strict:
+            assert s[-3:] in self.stops, s
+        assert len(s) % 3 == 0
+        for match in self.re_starts.finditer(s, overlapped=True):
+            # Must check the start is in frame
+            start = match.start()
+            if start % 3 == 0:
+                n = s[start:]
+                assert len(n) % 3 == 0, "%s is len %i" % (n, len(n))
+                if strict:
+                    t = translate(n, self.table)
+                else:
+                    # Use when missing stop codon,
+                    t = "M" + translate(n[3:], self.table, to_stop=True)
+                yield start, n, t  # Edited by CPT to be a generator
+
+    def break_up_frame(self, s):
+        """Returns offset, nuc, protein."""
+        start = 0
+        for match in self.re_stops.finditer(s, overlapped=True):
+            index = match.start() + 3
+            if index % 3 != 0:
+                continue
+            n = s[start:index]
+            for (offset, n, t) in self.start_chop_and_trans(n):
+                if n and len(t) >= self.min_len:
+                    yield start + offset, n, t
+            start = index
+
+    def putative_genes_in_sequence(self, nuc_seq):
+        """Returns start, end, strand, nucleotides, protein.
+        Co-ordinates are Python style zero-based.
+        """
+        nuc_seq = nuc_seq.upper()
+        # TODO - Refactor to use a generator function (in start order)
+        # rather than making a list and sorting?
+        answer = []
+        full_len = len(nuc_seq)
+
+        for frame in range(0, 3):
+            for offset, n, t in self.break_up_frame(nuc_seq[frame:]):
+                start = frame + offset  # zero based
+                answer.append((start, start + len(n), +1, n, t))
+
+        rc = reverse_complement(nuc_seq)
+        for frame in range(0, 3):
+            for offset, n, t in self.break_up_frame(rc[frame:]):
+                start = full_len - frame - offset  # zero based
+                answer.append((start, start - len(n), -1, n, t))
+        answer.sort()
+        return answer
+
+    def get_all_peptides(self, nuc_seq):
+        """Returns start, end, strand, nucleotides, protein.
+
+        Co-ordinates are Python style zero-based.
+        """
+        # Refactored into generator by CPT
+        full_len = len(nuc_seq)
+        if self.strand != "reverse":
+            for frame in range(0, 3):
+                for offset, n, t in self.break_up_frame(nuc_seq[frame:]):
+                    start = frame + offset  # zero based
+                    yield (start, start + len(n), +1, n, t)
+        if self.strand != "forward":
+            rc = reverse_complement(nuc_seq)
+            for frame in range(0, 3):
+                for offset, n, t in self.break_up_frame(rc[frame:]):
+                    start = full_len - frame - offset  # zero based
+                    yield (start - len(n), start, -1, n, t)
+
+
+class MGAFinder(object):
+    def __init__(self, table, ftype, ends, min_len):
+        self.table = table
+        self.table_obj = CodonTable.ambiguous_generic_by_id[table]
+        self.ends = ends
+        self.ftype = ftype
+        self.min_len = min_len
+        self.starts = sorted(self.table_obj.start_codons)
+        self.stops = sorted(self.table_obj.stop_codons)
+        self.re_starts = re.compile("|".join(self.starts))
+        self.re_stops = re.compile("|".join(self.stops))
+
+    def locate(self, fasta_file, out_nuc, out_prot, out_bed, out_gff3):
+        seq_format = "fasta"
+        log.debug("Genetic code table %i" % self.table)
+        log.debug("Minimum length %i aa" % self.min_len)
+
+        out_count = 0
+
+        out_gff3.write("##gff-version 3\n")
+
+        for idx, record in enumerate(SeqIO.parse(fasta_file, seq_format)):
+            for i, (f_start, f_end, f_strand, n, t) in enumerate(
+                self.get_all_peptides(str(record.seq).upper())
+            ):
+                out_count += 1
+
+                descr = "length %i aa, %i bp, from %s..%s[%s] of %s" % (
+                    len(t),
+                    len(n),
+                    f_start,
+                    f_end,
+                    f_strand,
+                    record.description,
+                )
+                fid = record.id + "|%s%i" % (self.ftype, i + 1)
+
+                r = SeqRecord(Seq(n), id=fid, name="", description=descr)
+                t = SeqRecord(Seq(t), id=fid, name="", description=descr)
+
+                SeqIO.write(r, out_nuc, "fasta")
+                SeqIO.write(t, out_prot, "fasta")
+
+                nice_strand = "+" if f_strand == +1 else "-"
+
+                out_bed.write(
+                    "\t".join(
+                        map(str, [record.id, f_start, f_end, fid, 0, nice_strand])
+                    )
+                    + "\n"
+                )
+
+                out_gff3.write(
+                    "\t".join(
+                        map(
+                            str,
+                            [
+                                record.id,
+                                "getOrfsOrCds",
+                                "CDS",
+                                f_start + 1,
+                                f_end,
+                                ".",
+                                nice_strand,
+                                0,
+                                "ID=%s.%s.%s" % (self.ftype, idx, i + 1),
+                            ],
+                        )
+                    )
+                    + "\n"
+                )
+        log.info("Found %i %ss", out_count, self.ftype)
+
+    def start_chop_and_trans(self, s, strict=True):
+        """Returns offset, trimmed nuc, protein."""
+        if strict:
+            assert s[-3:] in self.stops, s
+        assert len(s) % 3 == 0
+        for match in self.re_starts.finditer(s, overlapped=True):
+            # Must check the start is in frame
+            start = match.start()
+            if start % 3 == 0:
+                n = s[start:]
+                assert len(n) % 3 == 0, "%s is len %i" % (n, len(n))
+                if strict:
+                    t = translate(n, self.table)
+                else:
+                    # Use when missing stop codon,
+                    t = "M" + translate(n[3:], self.table, to_stop=True)
+                yield start, n, t
+
+    def break_up_frame(self, s):
+        """Returns offset, nuc, protein."""
+        start = 0
+        for match in self.re_stops.finditer(s, overlapped=True):
+            index = match.start() + 3
+            if index % 3 != 0:
+                continue
+            n = s[start:index]
+            for (offset, n, t) in self.start_chop_and_trans(n):
+                if n and len(t) >= self.min_len:
+                    yield start + offset, n, t
+            start = index
+
+    def putative_genes_in_sequence(self, nuc_seq):
+        """Returns start, end, strand, nucleotides, protein.
+        Co-ordinates are Python style zero-based.
+        """
+        nuc_seq = nuc_seq.upper()
+        # TODO - Refactor to use a generator function (in start order)
+        # rather than making a list and sorting?
+        answer = []
+        full_len = len(nuc_seq)
+
+        for frame in range(0, 3):
+            for offset, n, t in self.break_up_frame(nuc_seq[frame:]):
+                start = frame + offset  # zero based
+                answer.append((start, start + len(n), +1, n, t))
+
+        rc = reverse_complement(nuc_seq)
+        for frame in range(0, 3):
+            for offset, n, t in self.break_up_frame(rc[frame:]):
+                start = full_len - frame - offset  # zero based
+                answer.append((start, start - len(n), -1, n, t))
+        answer.sort()
+        return answer
+
+    def get_all_peptides(self, nuc_seq):
+        """Returns start, end, strand, nucleotides, protein.
+
+        Co-ordinates are Python style zero-based.
+        """
+        # Refactored into generator by CPT
+
+        full_len = len(nuc_seq)
+        for frame in range(0, 3):
+            for offset, n, t in self.break_up_frame(nuc_seq[frame:]):
+                start = frame + offset  # zero based
+                yield (start, start + len(n), +1, n, t)
+        rc = reverse_complement(nuc_seq)
+        for frame in range(0, 3):
+            for offset, n, t in self.break_up_frame(rc[frame:]):
+                start = full_len - frame - offset  # zero based
+                yield (start - len(n), start, -1, n, t)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/gff3.py	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,346 @@
+import copy
+import logging
+
+log = logging.getLogger()
+log.setLevel(logging.WARN)
+
+
+def feature_lambda(
+    feature_list,
+    test,
+    test_kwargs,
+    subfeatures=True,
+    parent=None,
+    invert=False,
+    recurse=True,
+):
+    """Recursively search through features, testing each with a test function, yielding matches.
+
+    GFF3 is a hierachical data structure, so we need to be able to recursively
+    search through features. E.g. if you're looking for a feature with
+    ID='bob.42', you can't just do a simple list comprehension with a test
+    case. You don't know how deeply burried bob.42 will be in the feature tree. This is where feature_lambda steps in.
+
+    :type feature_list: list
+    :param feature_list: an iterable of features
+
+    :type test: function reference
+    :param test: a closure with the method signature (feature, **kwargs) where
+                 the kwargs are those passed in the next argument. This
+                 function should return True or False, True if the feature is
+                 to be yielded as part of the main feature_lambda function, or
+                 False if it is to be ignored. This function CAN mutate the
+                 features passed to it (think "apply").
+
+    :type test_kwargs: dictionary
+    :param test_kwargs: kwargs to pass to your closure when it is called.
+
+    :type subfeatures: boolean
+    :param subfeatures: when a feature is matched, should just that feature be
+                        yielded to the caller, or should the entire sub_feature
+                        tree for that feature be included? subfeatures=True is
+                        useful in cases such as searching for a gene feature,
+                        and wanting to know what RBS/Shine_Dalgarno_sequences
+                        are in the sub_feature tree (which can be accomplished
+                        with two feature_lambda calls). subfeatures=False is
+                        useful in cases when you want to process (and possibly
+                        return) the entire feature tree, such as applying a
+                        qualifier to every single feature.
+
+    :type invert: boolean
+    :param invert: Negate/invert the result of the filter.
+
+    :rtype: yielded list
+    :return: Yields a list of matching features.
+    """
+    # Either the top level set of [features] or the subfeature attribute
+    for feature in feature_list:
+        feature._parent = parent
+        if not parent:
+            # Set to self so we cannot go above root.
+            feature._parent = feature
+        test_result = test(feature, **test_kwargs)
+        # if (not invert and test_result) or (invert and not test_result):
+        if invert ^ test_result:
+            if not subfeatures:
+                feature_copy = copy.deepcopy(feature)
+                feature_copy.sub_features = list()
+                yield feature_copy
+            else:
+                yield feature
+
+        if recurse and hasattr(feature, "sub_features"):
+            for x in feature_lambda(
+                feature.sub_features,
+                test,
+                test_kwargs,
+                subfeatures=subfeatures,
+                parent=feature,
+                invert=invert,
+                recurse=recurse,
+            ):
+                yield x
+
+
+def fetchParent(feature):
+    if not hasattr(feature, "_parent") or feature._parent is None:
+        return feature
+    else:
+        return fetchParent(feature._parent)
+
+
+def feature_test_true(feature, **kwargs):
+    return True
+
+
+def feature_test_type(feature, **kwargs):
+    if "type" in kwargs:
+        return str(feature.type).upper() == str(kwargs["type"]).upper()
+    elif "types" in kwargs:
+      for x in kwargs["types"]:
+        if str(feature.type).upper() == str(x).upper():
+          return True
+      return False
+    raise Exception("Incorrect feature_test_type call, need type or types")
+
+
+def feature_test_qual_value(feature, **kwargs):
+    """Test qualifier values.
+
+    For every feature, check that at least one value in
+    feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list']
+    """
+    if isinstance(kwargs["qualifier"], list):
+        for qualifier in kwargs["qualifier"]:
+            for attribute_value in feature.qualifiers.get(qualifier, []):
+                if attribute_value in kwargs["attribute_list"]:
+                    return True
+    else:
+        for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []):
+            if attribute_value in kwargs["attribute_list"]:
+                return True
+    return False
+
+
+def feature_test_location(feature, **kwargs):
+    if "strand" in kwargs:
+        if feature.location.strand != kwargs["strand"]:
+            return False
+
+    return feature.location.start <= kwargs["loc"] <= feature.location.end
+
+
+def feature_test_quals(feature, **kwargs):
+    """
+    Example::
+
+        a = Feature(qualifiers={'Note': ['Some notes', 'Aasdf']})
+
+        # Check if a contains a Note
+        feature_test_quals(a, {'Note': None})  # Returns True
+        feature_test_quals(a, {'Product': None})  # Returns False
+
+        # Check if a contains a note with specific value
+        feature_test_quals(a, {'Note': ['ome']})  # Returns True
+
+        # Check if a contains a note with specific value
+        feature_test_quals(a, {'Note': ['other']})  # Returns False
+    """
+    for key in kwargs:
+        if key not in feature.qualifiers:
+            return False
+
+        # Key is present, no value specified
+        if kwargs[key] is None:
+            return True
+
+        # Otherwise there is a key value we're looking for.
+        # so we make a list of matches
+        matches = []
+        # And check all of the feature qualifier valuse
+        for value in feature.qualifiers[key]:
+            # For that kwargs[key] value
+            for x in kwargs[key]:
+                matches.append(x in value)
+
+        # If none matched, then we return false.
+        if not any(matches):
+            return False
+
+    return True
+
+
+def feature_test_contains(feature, **kwargs):
+    if "index" in kwargs:
+        return feature.location.start < kwargs["index"] < feature.location.end
+    elif "range" in kwargs:
+        return (
+            feature.location.start < kwargs["range"]["start"] < feature.location.end
+            and feature.location.start < kwargs["range"]["end"] < feature.location.end
+        )
+    else:
+        raise RuntimeError("Must use index or range keyword")
+
+
+def get_id(feature=None, parent_prefix=None):
+    result = ""
+    if parent_prefix is not None:
+        result += parent_prefix + "|"
+    if "locus_tag" in feature.qualifiers:
+        result += feature.qualifiers["locus_tag"][0]
+    elif "gene" in feature.qualifiers:
+        result += feature.qualifiers["gene"][0]
+    elif "Gene" in feature.qualifiers:
+        result += feature.qualifiers["Gene"][0]
+    elif "product" in feature.qualifiers:
+        result += feature.qualifiers["product"][0]
+    elif "Product" in feature.qualifiers:
+        result += feature.qualifiers["Product"][0]
+    elif "Name" in feature.qualifiers:
+        result += feature.qualifiers["Name"][0]
+    else:
+        return feature.id
+        # Leaving in case bad things happen.
+        # result += '%s_%s_%s_%s' % (
+        # feature.id,
+        # feature.location.start,
+        # feature.location.end,
+        # feature.location.strand
+        # )
+    return result
+
+
+def get_gff3_id(gene):
+    return gene.qualifiers.get("Name", [gene.id])[0]
+
+
+def ensure_location_in_bounds(start=0, end=0, parent_length=0):
+    # This prevents frameshift errors
+    while start < 0:
+        start += 3
+    while end < 0:
+        end += 3
+    while start > parent_length:
+        start -= 3
+    while end > parent_length:
+        end -= 3
+    return (start, end)
+
+
+def coding_genes(feature_list):
+    for x in genes(feature_list):
+        if (
+            len(
+                list(
+                    feature_lambda(
+                        x.sub_features,
+                        feature_test_type,
+                        {"type": "CDS"},
+                        subfeatures=False,
+                    )
+                )
+            )
+            > 0
+        ):
+            yield x
+
+
+def genes(feature_list, feature_type="gene", sort=False):
+    """
+    Simple filter to extract gene features from the feature set.
+    """
+
+    if not sort:
+        for x in feature_lambda(
+            feature_list, feature_test_type, {"type": feature_type}, subfeatures=True
+        ):
+            yield x
+    else:
+        data = list(genes(feature_list, feature_type=feature_type, sort=False))
+        data = sorted(data, key=lambda feature: feature.location.start)
+        for x in data:
+            yield x
+
+
+def wa_unified_product_name(feature):
+    """
+    Try and figure out a name. We gave conflicting instructions, so
+    this isn't as trivial as it should be. Sometimes it will be in
+    'product' or 'Product', othertimes in 'Name'
+    """
+    # Manually applied tags.
+    protein_product = feature.qualifiers.get(
+        "product", feature.qualifiers.get("Product", [None])
+    )[0]
+
+    # If neither of those are available ...
+    if protein_product is None:
+        # And there's a name...
+        if "Name" in feature.qualifiers:
+            if not is_uuid(feature.qualifiers["Name"][0]):
+                protein_product = feature.qualifiers["Name"][0]
+
+    return protein_product
+
+
+def is_uuid(name):
+    return name.count("-") == 4 and len(name) == 36
+
+
+def get_rbs_from(gene):
+    # Normal RBS annotation types
+    rbs_rbs = list(
+        feature_lambda(
+            gene.sub_features, feature_test_type, {"type": "RBS"}, subfeatures=False
+        )
+    )
+    rbs_sds = list(
+        feature_lambda(
+            gene.sub_features,
+            feature_test_type,
+            {"type": "Shine_Dalgarno_sequence"},
+            subfeatures=False,
+        )
+    )
+    # Fraking apollo
+    apollo_exons = list(
+        feature_lambda(
+            gene.sub_features, feature_test_type, {"type": "exon"}, subfeatures=False
+        )
+    )
+    apollo_exons = [x for x in apollo_exons if len(x) < 10]
+    # These are more NCBI's style
+    regulatory_elements = list(
+        feature_lambda(
+            gene.sub_features,
+            feature_test_type,
+            {"type": "regulatory"},
+            subfeatures=False,
+        )
+    )
+    rbs_regulatory = list(
+        feature_lambda(
+            regulatory_elements,
+            feature_test_quals,
+            {"regulatory_class": ["ribosome_binding_site"]},
+            subfeatures=False,
+        )
+    )
+    # Here's hoping you find just one ;)
+    return rbs_rbs + rbs_sds + rbs_regulatory + apollo_exons
+
+
+def nice_name(record):
+    """
+    get the real name rather than NCBI IDs and so on. If fails, will return record.id
+    """
+    name = record.id
+    likely_parental_contig = list(genes(record.features, feature_type="contig"))
+    if len(likely_parental_contig) == 1:
+        name = likely_parental_contig[0].qualifiers.get("organism", [name])[0]
+    return name
+
+
+def fsort(it):
+    for i in sorted(it, key=lambda x: int(x.location.start)):
+        yield i
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/macros.xml	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,23 @@
+<?xml version="1.0"?>
+<macros>
+  <xml name="requirements">
+    <requirements>
+		<requirement type="package" version="3.8.13">python</requirement>
+		<requirement type="package" version="1.79">biopython</requirement>
+		<requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
+		<yield/>
+    </requirements>
+  </xml>
+  <xml name="genome_selector">
+	    <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+  </xml>
+  <xml name="gff3_input">
+    <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+  </xml>
+  <token name="@GENOME_SELECTOR_PRE@">
+		ln -s $genome_fasta genomeref.fa;
+	</token>
+	<token name="@GENOME_SELECTOR@">
+		genomeref.fa
+	</token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/phage_annotation_validator.py	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,1254 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim: set fileencoding=utf-8
+import os
+import sys
+import json
+import math
+import numpy
+import argparse
+import itertools
+import logging
+from gff3 import (
+    feature_lambda,
+    coding_genes,
+    genes,
+    get_gff3_id,
+    feature_test_location,
+    get_rbs_from,
+    nice_name,
+)
+from shinefind import NaiveSDCaller
+from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+from Bio.SeqFeature import SeqFeature, FeatureLocation
+from jinja2 import Environment, FileSystemLoader
+from cpt import MGAFinder
+
+logging.basicConfig(level=logging.DEBUG)
+log = logging.getLogger(name="pav")
+
+# Path to script, required because of Galaxy.
+SCRIPT_PATH = os.path.dirname(os.path.realpath(__file__))
+# Path to the HTML template for the report
+
+ENCOURAGEMENT = (
+    (100, "Perfection itself!"),
+    (90, "Amazing!"),
+    (80, "Not too bad, a few minor things to fix..."),
+    (70, "Some issues to address"),
+    (
+        50,
+        """Issues detected! </p><p class="text-muted">Have you heard of the
+     <a href="https://cpt.tamu.edu">CPT</a>\'s Automated Phage Annotation
+     Pipeline?""",
+    ),
+    (
+        0,
+        """<b>MAJOR</b> issues detected! Please consider using the
+     <a href="https://cpt.tamu.edu">CPT</a>\'s Automated Phage Annotation Pipeline""",
+    ),
+)
+
+
+def gen_qc_feature(start, end, message, strand=0, id_src=None, type_src="gene"):
+    kwargs = {"qualifiers": {"note": [message]}}
+    kwargs["type"] = type_src
+    kwargs["strand"] = strand
+    kwargs["phase"]=0
+    kwargs["score"]=0.0
+    kwargs["source"]="feature"
+    if id_src is not None:
+        kwargs["id"] = id_src.id
+        kwargs["qualifiers"]["ID"] = [id_src.id]
+        kwargs["qualifiers"]["Name"] = id_src.qualifiers.get("Name", [])
+	
+
+    if end >= start:
+        return gffSeqFeature(FeatureLocation(start, end, strand=strand), **kwargs)
+    else:
+        return gffSeqFeature(FeatureLocation(end, start, strand=strand), **kwargs)
+
+
+def __ensure_location_in_bounds(start=0, end=0, parent_length=0):
+    # This prevents frameshift errors
+    while start < 0:
+        start += 3
+    while end < 0:
+        end += 3
+    while start > parent_length:
+        start -= 3
+    while end > parent_length:
+        end -= 3
+    return (start, end)
+
+
+def missing_rbs(record, lookahead_min=5, lookahead_max=15):
+    """
+    Identify gene features with missing RBSs
+
+    This "looks ahead" 5-15 bases ahead of each gene feature, and checks if
+    there's an RBS feature in those bounds.
+
+    The returned data is a set of genes with the RBS sequence in the __upstream
+    attribute, and a message in the __message attribute.
+    """
+    results = []
+    good = 0
+    bad = 0
+    qc_features = []
+    sd_finder = NaiveSDCaller()
+
+    any_rbss = False
+
+    for gene in coding_genes(record.features):
+        # Check if there are RBSs, TODO: make this recursive. Each feature in
+        # gene.sub_features can also have sub_features.
+        rbss = get_rbs_from(gene)
+        # No RBS found
+        if len(rbss) == 0:
+            # Get the sequence lookahead_min to lookahead_max upstream
+            if gene.strand > 0:
+                start = gene.location.start - lookahead_max
+                end = gene.location.start - lookahead_min
+            else:
+                start = gene.location.end + lookahead_min
+                end = gene.location.end + lookahead_max
+            # We have to ensure the feature is ON the genome, otherwise we may
+            # be trying to access a location outside of the length of the
+            # genome, which would be bad.
+            (start, end) = __ensure_location_in_bounds(
+                start=start, end=end, parent_length=len(record)
+            )
+            # Temporary feature to extract sequence
+            tmp = gffSeqFeature(
+                FeatureLocation(start, end, strand=gene.strand), type="domain"
+            )
+            # Get the sequence
+            seq = str(tmp.extract(record.seq))
+            # Set the default properties
+            gene.__upstream = seq.lower()
+            gene.__message = "No RBS annotated, None found"
+
+            # Try and do an automated shinefind call
+            sds = sd_finder.list_sds(seq)
+            if len(sds) > 0:
+                sd = sds[0]
+                gene.__upstream = sd_finder.highlight_sd(
+                    seq.lower(), sd["start"], sd["end"]
+                )
+                gene.__message = "Unannotated but valid RBS"
+
+            qc_features.append(
+                gen_qc_feature(
+                    start, end, "Missing RBS", strand=gene.strand, id_src=gene, type_src="gene"
+                )
+            )
+
+            bad += 1
+            results.append(gene)
+            results[-1].location = FeatureLocation(results[-1].location.start + 1, results[-1].location.end, results[-1].location.strand)
+        else:
+            if len(rbss) > 1:
+                log.warn("%s RBSs found for gene %s", rbss[0].id, get_gff3_id(gene))
+            any_rbss = True
+            # get first RBS/CDS
+            cds = list(genes(gene.sub_features, feature_type="CDS"))[0]
+            rbs = rbss[0]
+
+            # Get the distance between the two
+            if gene.strand > 0:
+                distance = cds.location.start - rbs.location.end
+            else:
+                distance = rbs.location.start - cds.location.end
+
+            # If the RBS is too far away, annotate that
+            if distance > lookahead_max:
+                gene.__message = "RBS too far away (%s nt)" % distance
+
+                qc_features.append(
+                    gen_qc_feature(
+                        rbs.location.start,
+                        rbs.location.end,
+                        gene.__message,
+                        strand=gene.strand,
+                        id_src=gene,
+                        type_src="gene"
+                    )
+                )
+
+                bad += 1
+                results.append(gene)
+                results[-1].location = FeatureLocation(results[-1].location.start + 1, results[-1].location.end, results[-1].location.strand)
+            else:
+                good += 1
+
+    return good, bad, results, qc_features, any_rbss
+
+
+# modified from get_orfs_or_cdss.py
+# -----------------------------------------------------------
+
+
+def require_sd(data, record, chrom_start, sd_min, sd_max):
+    sd_finder = NaiveSDCaller()
+    for putative_gene in data:
+        if putative_gene[2] > 0:  # strand
+            start = chrom_start + putative_gene[0] - sd_max
+            end = chrom_start + putative_gene[0] - sd_min
+        else:
+            start = chrom_start + putative_gene[1] + sd_min
+            end = chrom_start + putative_gene[1] + sd_max
+
+        (start, end) = __ensure_location_in_bounds(
+            start=start, end=end, parent_length=len(record)
+        )
+        tmp = gffSeqFeature(
+            FeatureLocation(start, end, strand=putative_gene[2]), type="domain"
+        )
+        # Get the sequence
+        seq = str(tmp.extract(record.seq))
+        sds = sd_finder.list_sds(seq)
+        if len(sds) > 0:
+            yield putative_gene + (start, end)
+
+
+def excessive_gap(
+    record,
+    excess=50,
+    excess_divergent=200,
+    min_gene=30,
+    slop=30,
+    lookahead_min=5,
+    lookahead_max=15,
+):
+    """
+    Identify excessive gaps between gene features.
+
+    Default "excessive" gap size is 10, but that should likely be larger.
+    """
+    results = []
+    good = 0
+    bad = 0
+
+    contiguous_regions = []
+
+    sorted_genes = sorted(
+        genes(record.features), key=lambda feature: feature.location.start
+    )
+    if len(sorted_genes) == 0:
+        log.warn("NO GENES FOUND")
+        return good, bad, results, []
+
+    current_gene = None
+    for gene in sorted_genes:
+        # If the gene's start is contiguous to the "current_gene", then we
+        # extend current_gene
+        for cds in genes(gene.sub_features, feature_type="CDS"):
+            if current_gene is None:
+                current_gene = [int(cds.location.start), int(cds.location.end)]
+
+            if cds.location.start <= current_gene[1] + excess:
+                # Don't want to decrease size
+                if int(cds.location.end) >= current_gene[1]:
+                    current_gene[1] = int(cds.location.end)
+            else:
+                # If it's discontiguous, we append the region and clear.
+                contiguous_regions.append(current_gene)
+                current_gene = [int(cds.location.start), int(cds.location.end)]
+
+    # This generally expected that annotations would NOT continue unto the end
+    # of the genome, however that's a bug, and we can make it here with an
+    # empty contiguous_regions list
+    contiguous_regions.append(current_gene)
+
+    for i in range(len(contiguous_regions) + 1):
+        if i == 0:
+            a = (1, 1)
+            b = contiguous_regions[i]
+        elif i >= len(contiguous_regions):
+            a = contiguous_regions[i - 1]
+            b = (len(record.seq), None)
+        else:
+            a = contiguous_regions[i - 1]
+            b = contiguous_regions[i]
+
+        gap_size = abs(b[0] - a[1])
+        
+        if gap_size > min(excess, excess_divergent):
+            a_feat_l = itertools.islice(
+                feature_lambda(
+                    sorted_genes,
+                    feature_test_location,
+                    {"loc": a[1]},
+                    subfeatures=False,
+                ),
+                1,
+            )
+            b_feat_l = itertools.islice(
+                feature_lambda(
+                    sorted_genes,
+                    feature_test_location,
+                    {"loc": b[0]},
+                    subfeatures=False,
+                ),
+                1,
+            )
+
+            try:
+                a_feat = next(a_feat_l)
+            except StopIteration:
+                # Triggers on end of genome
+                a_feat = None
+            try:
+                b_feat = next(b_feat_l)
+            except StopIteration:
+                # Triggers on end of genome
+                b_feat = None
+
+            result_obj = [
+                a[1],
+                b[0],
+                None if not a_feat else a_feat.location.strand,
+                None if not b_feat else b_feat.location.strand,
+            ]
+
+            if a_feat is None or b_feat is None:
+                if gap_size > excess_divergent:
+                    results.append(result_obj)
+            else:
+                if (
+                    a_feat.location.strand == b_feat.location.strand
+                    and gap_size > excess
+                ):
+                    results.append(result_obj)
+                elif (
+                    a_feat.location.strand != b_feat.location.strand
+                    and gap_size > excess_divergent
+                ):
+                    results.append(result_obj)
+
+    better_results = []
+    qc_features = []
+    of = MGAFinder(11, "CDS", "closed", min_gene)
+    # of = OrfFinder(11, 'CDS', 'closed', min_gene)
+
+    for result_obj in results:
+        start = result_obj[0]
+        end = result_obj[1]
+        f = gen_qc_feature(start, end, "Excessive gap, %s bases" % abs(end - start), type_src="gene")
+        qc_features.append(f)
+        putative_genes = of.putative_genes_in_sequence(
+            str(record[start - slop : end + slop].seq)
+        )
+        putative_genes = list(
+            require_sd(putative_genes, record, start, lookahead_min, lookahead_max)
+        )
+        for putative_gene in putative_genes:
+            # (0, 33, 1, 'ATTATTTTATCAAAACGCTTTACAATCTTTTAG', 'MILSKRFTIF', 123123, 124324)
+            possible_gene_start = start + putative_gene[0]
+            possible_gene_end = start + putative_gene[1]
+
+            if possible_gene_start <= possible_gene_end:
+                possible_cds = gffSeqFeature(
+                    FeatureLocation(
+                        possible_gene_start, possible_gene_end, strand=putative_gene[2]
+                    ),
+                    type="CDS",
+                )
+            else:
+                possible_cds = gffSeqFeature(
+                    FeatureLocation(
+                        possible_gene_end, possible_gene_start, strand=putative_gene[2],
+                    ),
+                    type="CDS",
+                )
+
+            # Now we adjust our boundaries for the RBS that's required
+            # There are only two cases, the rbs is upstream of it, or downstream
+            if putative_gene[5] < possible_gene_start:
+                possible_gene_start = putative_gene[5]
+            else:
+                possible_gene_end = putative_gene[6]
+
+            if putative_gene[5] <= putative_gene[6]:
+                possible_rbs = gffSeqFeature(
+                    FeatureLocation(
+                        putative_gene[5], putative_gene[6], strand=putative_gene[2]
+                    ),
+                    type="Shine_Dalgarno_sequence",
+                )
+            else:
+                possible_rbs = gffSeqFeature(
+                    FeatureLocation(
+                        putative_gene[6], putative_gene[5], strand=putative_gene[2],
+                    ),
+                    type="Shine_Dalgarno_sequence",
+                )
+
+            if possible_gene_start <= possible_gene_end:
+                possible_gene = gffSeqFeature(
+                    FeatureLocation(
+                        possible_gene_start, possible_gene_end, strand=putative_gene[2]
+                    ),
+                    type="gene",
+                    qualifiers={"note": ["Possible gene"]},
+                )
+            else:
+                possible_gene = gffSeqFeature(
+                    FeatureLocation(
+                        possible_gene_end, possible_gene_start, strand=putative_gene[2],
+                    ),
+                    type="gene",
+                    qualifiers={"note": ["Possible gene"]},
+                )
+            possible_gene.sub_features = [possible_rbs, possible_cds]
+            qc_features.append(possible_gene)
+
+        better_results.append(result_obj + [len(putative_genes)])
+
+    # Bad gaps are those with more than zero possible genes found
+    bad = len([x for x in better_results if x[2] > 0])
+    # Generally taking "good" here as every possible gap in the genome
+    # Thus, good is TOTAL - gaps
+    good = len(sorted_genes) + 1 - bad
+    # and bad is just gaps
+    return good, bad, better_results, qc_features
+
+
+def phi(x):
+    """Standard phi function used in calculation of normal distribution"""
+    return math.exp(-1 * math.pi * x * x)
+
+
+def norm(x, mean=0, sd=1):
+    """
+    Normal distribution. Given an x position, a mean, and a standard
+    deviation, calculate the "y" value. Useful for score scaling
+
+    Modified to multiply by SD. This means even at sd=5, norm(x, mean) where x = mean => 1, rather than 1/5.
+    """
+    return (1 / float(sd)) * phi(float(x - mean) / float(sd)) * sd
+
+
+def coding_density(record, mean=92.5, sd=20):
+    """
+    Find coding density in the genome
+    """
+    feature_lengths = 0
+
+    for gene_a in coding_genes(record.features):
+        feature_lengths += sum(
+            [len(x) for x in genes(gene_a.sub_features, feature_type="CDS")]
+        )
+
+    avgFeatLen = float(feature_lengths) / float(len(record.seq))
+    return int(norm(100 * avgFeatLen, mean=mean, sd=sd) * 100), int(100 * avgFeatLen)
+
+
+def exact_coding_density(record, mean=92.5, sd=20):
+    """
+    Find exact coding density in the genome
+    """
+    data = numpy.zeros(len(record.seq))
+
+    for gene_a in coding_genes(record.features):
+        for cds in genes(gene_a.sub_features, feature_type="CDS"):
+            for i in range(cds.location.start, cds.location.end + 1):
+                data[i - 1] = 1
+
+    return float(sum(data)) / len(data)
+
+
+def excessive_overlap(record, excess=15, excess_divergent=30):
+    """
+    Find excessive overlaps in the genome, where excessive is defined as 15
+    bases for same strand, and 30 for divergent translation.
+
+    Does a product of all the top-level features in the genome, and calculates
+    gaps.
+    """
+    results = []
+    bad = 0
+    qc_features = []
+
+    for (gene_a, gene_b) in itertools.combinations(coding_genes(record.features), 2):
+        # Get the CDS from the subfeature list.
+        # TODO: not recursive.
+        cds_a = [x for x in genes(gene_a.sub_features, feature_type="CDS")]
+        cds_b = [x for x in genes(gene_b.sub_features, feature_type="CDS")]
+
+        if len(cds_a) == 0:
+            log.warn("Gene missing subfeatures; %s", get_gff3_id(gene_a))
+            continue
+
+        if len(cds_b) == 0:
+            log.warn("Gene missing subfeatures; %s", get_gff3_id(gene_b))
+            continue
+
+        cds_a = cds_a[0]
+        cds_b = cds_b[0]
+
+        # Set of locations that are included in the CDS of A and the
+        # CDS of B
+        cas = set(range(cds_a.location.start, cds_a.location.end))
+        cbs = set(range(cds_b.location.start, cds_b.location.end))
+
+        # Here we calculate the intersection between the two sets, and
+        # if it's larger than our excessive size, we know that they're
+        # overlapped
+        ix = cas.intersection(cbs)
+
+        if (cds_a.location.strand == cds_b.location.strand and len(ix) >= excess) or (
+            cds_a.location.strand != cds_b.location.strand
+            and len(ix) >= excess_divergent
+        ):
+            bad += float(len(ix)) / float(min(excess, excess_divergent))
+            qc_features.append(
+                gen_qc_feature(min(ix), max(ix), "Excessive Overlap", id_src=gene_a, type_src="gene")
+            )
+            results.append((gene_a, gene_b, min(ix), max(ix)))
+
+    # Good isn't accurate here. It's a triangle number and just ugly, but we
+    # don't care enough to fix it.
+    good = len(list(coding_genes(record.features)))
+    good = int(good - bad)
+    if good < 0:
+        good = 0
+    return good, int(bad), results, qc_features
+
+
+def get_encouragement(score):
+    """Some text telling the user how they did
+    """
+    for encouragement in ENCOURAGEMENT:
+        if score > encouragement[0]:
+            return encouragement[1]
+    return ENCOURAGEMENT[-1][1]
+
+
+def genome_overview(record):
+    """Genome overview
+    """
+    data = {
+        "genes": {
+            "count": 0,
+            "bases": len(record.seq),
+            "density": 0,  # genes / kb
+            "avg_len": [],
+            "comp": {"A": 0, "C": 0, "G": 0, "T": 0},
+        },
+        "overall": {
+            "comp": {
+                "A": record.seq.count("A") + record.seq.count("a"),
+                "C": record.seq.count("C") + record.seq.count("c"),
+                "G": record.seq.count("G") + record.seq.count("g"),
+                "T": record.seq.count("T") + record.seq.count("t"),
+            },
+            "gc": 0,
+        },
+    }
+    gene_features = list(coding_genes(record.features))
+    data["genes"]["count"] = len(gene_features)
+
+    for feat in gene_features:
+        data["genes"]["comp"]["A"] += feat.extract(record).seq.count("A") + feat.extract(record).seq.count("a")
+        data["genes"]["comp"]["C"] += feat.extract(record).seq.count("C") + feat.extract(record).seq.count("c")
+        data["genes"]["comp"]["T"] += feat.extract(record).seq.count("T") + feat.extract(record).seq.count("t")
+        data["genes"]["comp"]["G"] += feat.extract(record).seq.count("G") + feat.extract(record).seq.count("g")
+        #data["genes"]["bases"] += len(feat)
+        data["genes"]["avg_len"].append(len(feat))
+
+    data["genes"]["avg_len"] = float(sum(data["genes"]["avg_len"])) / len(gene_features)
+    data["overall"]["gc"] = float(
+        data["overall"]["comp"]["G"] + data["overall"]["comp"]["C"]
+    ) / len(record.seq)
+    return data
+
+
+def find_morons(record):
+    """Locate morons in the genome
+
+    Don't even know why...
+
+    TODO: remove? Idk.
+    """
+    results = []
+    good = 0
+    bad = 0
+
+    gene_features = list(coding_genes(record.features))
+    for i, gene in enumerate(gene_features):
+        two_left = gene_features[i - 2 : i]
+        two_right = gene_features[i + 1 : i + 1 + 2]
+        strands = [x.strand for x in two_left] + [x.strand for x in two_right]
+        anticon = [x for x in strands if x != gene.strand]
+
+        if len(anticon) == 4:
+            has_rbs = [x.type == "Shine_Dalgarno_sequence" for x in gene.sub_features]
+            if any(has_rbs):
+                rbs = [
+                    x for x in gene.sub_features if x.type == "Shine_Dalgarno_sequence"
+                ][0]
+                rbs_msg = str(rbs.extract(record.seq))
+            else:
+                rbs_msg = "No RBS Available"
+            results.append((gene, two_left, two_right, rbs_msg))
+            bad += 1
+        else:
+            good += 1
+    return good, bad, results, []
+
+
+def bad_gene_model(record):
+    """Find features without product
+    """
+    results = []
+    good = 0
+    bad = 0
+    qc_features = []
+
+    for gene in coding_genes(record.features):
+        exons = [
+            x for x in genes(gene.sub_features, feature_type="exon") if len(x) > 10
+        ]
+        CDSs = [x for x in genes(gene.sub_features, feature_type="CDS")]
+        if len(exons) >= 1 and len(CDSs) >= 1:
+            if len(exons) != len(CDSs):
+                results.append(
+                    (
+                        get_gff3_id(gene),
+                        None,
+                        None,
+                        "Mismatched number of exons and CDSs in gff3 representation",
+                    )
+                )
+                qc_features.append(
+                    gen_qc_feature(
+                        gene.location.start,
+                        gene.location.end,
+                        "Mismatched number of exons and CDSs in gff3 representation",
+                        strand=gene.strand,
+                        id_src=gene, 
+                        type_src="gene"
+                    )
+                )
+                bad += 1
+            else:
+                for (exon, cds) in zip(
+                    sorted(exons, key=lambda x: x.location.start),
+                    sorted(CDSs, key=lambda x: x.location.start),
+                ):
+                    if len(exon) != len(cds):
+                        results.append(
+                            (
+                                get_gff3_id(gene),
+                                exon,
+                                cds,
+                                "CDS does not extend to full length of gene",
+                            )
+                        )
+                        qc_features.append(
+                            gen_qc_feature(
+                                exon.location.start,
+                                exon.location.end,
+                                "CDS does not extend to full length of gene",
+                                strand=exon.strand,
+                                id_src=gene, 
+                                type_src="CDS"
+                            )
+                        )
+                        bad += 1
+                    else:
+                        good += 1
+        else:
+            log.warn("Could not handle %s, %s", exons, CDSs)
+            results.append(
+                (
+                    get_gff3_id(gene),
+                    None,
+                    None,
+                    "{0} exons, {1} CDSs".format(len(exons), len(CDSs)),
+                )
+            )
+
+    return good, len(results) + bad, results, qc_features
+
+
+def weird_starts(record):
+    """Find features without product
+    """
+    good = 0
+    bad = 0
+    qc_features = []
+    results = []
+
+    overall = {}
+    for gene in coding_genes(record.features):
+        seq = [x for x in genes(gene.sub_features, feature_type="CDS")]
+        if len(seq) == 0:
+            log.warn("No CDS for gene %s", get_gff3_id(gene))
+            continue
+        else:
+            seq = seq[0]
+
+        seq_str = str(seq.extract(record.seq))
+        start_codon = seq_str[0:3]
+        if len(seq_str) < 3:
+            sys.stderr.write("Fatal Error: CDS of length less than 3 at " + str(seq.location) + '\n')
+            exit(2)
+#        if len(seq_str) % 3 != 0:
+#            if len(seq_str) < 3:
+#                stop_codon = seq_str[-(len(seq_str))]
+#            else:
+#                stop_codon = seq_str[-3]
+#            
+#            log.warn("CDS at %s length is not a multiple of three (Length = %d)", get_gff3_id(gene), len(seq_str))
+#            seq.__error = "Bad CDS Length"
+#            results.append(seq)
+#            qc_features.append(
+#                gen_qc_feature(
+#                    s, e, "Bad Length", strand=seq.strand, id_src=gene
+#                )
+#            )
+#            bad += 1
+#            seq.__start = start_codon
+#            seq.__stop = stop_codon
+#            continue 
+
+        stop_codon = seq_str[-3]
+        seq.__start = start_codon
+        seq.__stop = stop_codon
+        if start_codon not in overall:
+            overall[start_codon] = 1
+        else:
+            overall[start_codon] += 1
+
+        if start_codon not in ("ATG", "TTG", "GTG"):
+            log.warn("Weird start codon (%s) on %s", start_codon, get_gff3_id(gene))
+            seq.__error = "Unusual start codon %s" % start_codon
+
+            s = 0
+            e = 0
+            if seq.strand > 0:
+                s = seq.location.start
+                e = seq.location.start + 3
+            else:
+                s = seq.location.end
+                e = seq.location.end - 3
+
+            results.append(seq)
+            results[-1].location = FeatureLocation(results[-1].location.start + 1, results[-1].location.end, results[-1].location.strand) 
+            qc_features.append(
+                gen_qc_feature(
+                    s, e, "Weird start codon", strand=seq.strand, id_src=gene, type_src="gene"
+                )
+            )
+            bad += 1
+        else:
+            good += 1
+
+    return good, bad, results, qc_features, overall
+
+
+def missing_genes(record):
+    """Find features without product
+    """
+    results = []
+    good = 0
+    bad = 0
+    qc_features = []
+
+    for gene in coding_genes(record.features):
+        if gene.qualifiers.get("cpt_source", [None])[0] == "CPT_GENE_MODEL_CORRECTION":
+            results.append(gene)
+            bad += 1
+        else:
+            good += 1
+
+    return good, bad, results, qc_features
+
+
+def gene_model_correction_issues(record):
+    """Find features that have issues from the gene model correction step.
+    These have qualifiers beginning with CPT_GMS
+    """
+    results = []
+    good = 0
+    bad = 0
+    qc_features = []
+
+    # For each gene
+    for gene in coding_genes(record.features):
+        # Get the list of child CDSs
+        cdss = [x for x in genes(gene.sub_features, feature_type="CDS")]
+        # And our matching qualifiers
+        gene_data = [(k, v) for (k, v) in gene.qualifiers.items() if k == "cpt_gmc"]
+        # If there are problems with ONLY the parent, let's complain
+        local_results = []
+        local_qc_features = []
+        for x in gene_data:
+            if "Missing Locus Tag" in x[1]:
+                # Missing locus tag is an either or thing, if it hits here
+                # there shouldn't be anything else wrong with it.
+
+                # Obviously missing so we remove it
+                gene.qualifiers["locus_tag"] = [""]
+                # Translation from bp_genbank2gff3.py
+                cdss[0].qualifiers["locus_tag"] = cdss[0].qualifiers["Name"]
+                # Append our results
+                local_results.append((gene, cdss[0], "Gene is missing a locus_tag"))
+                local_qc_features.append(
+                    gen_qc_feature(
+                        gene.location.start,
+                        gene.location.end,
+                        "Gene is missing a locus_tag",
+                        strand=gene.strand, 
+                        type_src="gene"
+                    )
+                )
+
+        # We need to alert on any child issues as well.
+        for cds in cdss:
+            cds_data = [
+                (k, v[0]) for (k, v) in cds.qualifiers.items() if k == "cpt_gmc"
+            ]
+            if len(gene_data) == 0 and len(cds_data) == 0:
+                # Alles gut
+                pass
+            else:
+                for _, problem in cds_data:
+                    if problem == "BOTH Missing Locus Tag":
+                        gene.qualifiers["locus_tag"] = [""]
+                        cds.qualifiers["locus_tag"] = [""]
+                        local_results.append(
+                            (gene, cds, "Both gene and CDS are missing locus tags")
+                        )
+                        local_qc_features.append(
+                            gen_qc_feature(
+                                cds.location.start,
+                                cds.location.end,
+                                "CDS is missing a locus_tag",
+                                strand=cds.strand, 
+                                type_src="CDS"
+                            )
+                        )
+                        local_qc_features.append(
+                            gen_qc_feature(
+                                gene.location.start,
+                                gene.location.end,
+                                "Gene is missing a locus_tag",
+                                strand=gene.strand, 
+                                type_src="gene"
+                            )
+                        )
+                    elif problem == "Different locus tag from associated gene.":
+                        gene.qualifiers["locus_tag"] = gene.qualifiers["Name"]
+                        cds.qualifiers["locus_tag"] = cds.qualifiers["cpt_gmc_locus"]
+                        local_results.append(
+                            (gene, cds, "Gene and CDS have differing locus tags")
+                        )
+                        local_qc_features.append(
+                            gen_qc_feature(
+                                gene.location.start,
+                                gene.location.end,
+                                "Gene and CDS have differing locus tags",
+                                strand=gene.strand, 
+                                type_src="gene"
+                            )
+                        )
+                    elif problem == "Missing Locus Tag":
+                        # Copy this over
+                        gene.qualifiers["locus_tag"] = gene.qualifiers["Name"]
+                        # This one is missing
+                        cds.qualifiers["locus_tag"] = [""]
+                        local_results.append((gene, cds, "CDS is missing a locus_tag"))
+                        local_qc_features.append(
+                            gen_qc_feature(
+                                cds.location.start,
+                                cds.location.end,
+                                "CDS is missing a locus_tag",
+                                strand=cds.strand, 
+                                type_src="CDS"
+                            )
+                        )
+                    else:
+                        log.warn("Cannot handle %s", problem)
+
+        if len(local_results) > 0:
+            bad += 1
+        else:
+            good += 1
+
+        qc_features.extend(local_qc_features)
+        results.extend(local_results)
+    return good, bad, results, qc_features
+
+
+def missing_tags(record):
+    """Find features without product
+    """
+    results = []
+    good = 0
+    bad = 0
+    qc_features = []
+
+    for gene in coding_genes(record.features):
+        cds = [x for x in genes(gene.sub_features, feature_type="CDS")]
+        if len(cds) == 0:
+            log.warn("Gene missing CDS subfeature %s", get_gff3_id(gene))
+            continue
+
+        cds = cds[0]
+
+        if "product" not in cds.qualifiers:
+            log.info("Missing product tag on %s", get_gff3_id(gene))
+            qc_features.append(
+                gen_qc_feature(
+                    cds.location.start,
+                    cds.location.end,
+                    "Missing product tag",
+                    strand=cds.strand,
+                    type_src="CDS"
+                )
+            )
+            results.append(cds)
+            bad += 1
+        else:
+            good += 1
+
+    return good, bad, results, qc_features
+
+
+def evaluate_and_report(
+    annotations,
+    genome,
+    gff3=None,
+    tbl=None,
+    sd_min=5,
+    sd_max=15,
+    min_gene_length=30,
+    excessive_gap_dist=50,
+    excessive_gap_divergent_dist=200,
+    excessive_overlap_dist=25,
+    excessive_overlap_divergent_dist=50,
+    reportTemplateName="phage_annotation_validator.html",
+):
+    """
+    Generate our HTML evaluation of the genome
+    """
+    # Get features from GFF file
+    seq_dict = SeqIO.to_dict(SeqIO.parse(genome, "fasta"))
+    # Get the first GFF3 record
+    # TODO: support multiple GFF3 files.
+    mostFeat = 0
+    for rec in list(gffParse(annotations, base_dict=seq_dict)):
+      if len(rec.features) > mostFeat:
+        mostFeat = len(rec.features)
+        record = rec
+
+    gff3_qc_record = SeqRecord(record.id, id=record.id)
+    gff3_qc_record.features = []
+    gff3_qc_features = []
+
+    log.info("Locating missing RBSs")
+    # mb_any = "did they annotate ANY rbss? if so, take off from score."
+    mb_good, mb_bad, mb_results, mb_annotations, mb_any = missing_rbs(
+        record, lookahead_min=sd_min, lookahead_max=sd_max
+    )
+    gff3_qc_features += mb_annotations
+
+    log.info("Locating excessive gaps")
+    eg_good, eg_bad, eg_results, eg_annotations = excessive_gap(
+        record,
+        excess=excessive_gap_dist,
+        excess_divergent=excessive_gap_divergent_dist,
+        min_gene=min_gene_length,
+        slop=excessive_overlap_dist,
+        lookahead_min=sd_min,
+        lookahead_max=sd_max,
+    )
+    gff3_qc_features += eg_annotations
+
+    log.info("Locating excessive overlaps")
+    eo_good, eo_bad, eo_results, eo_annotations = excessive_overlap(
+        record,
+        excess=excessive_overlap_dist,
+        excess_divergent=excessive_overlap_divergent_dist,
+    )
+    gff3_qc_features += eo_annotations
+
+    log.info("Locating morons")
+    mo_good, mo_bad, mo_results, mo_annotations = find_morons(record)
+    gff3_qc_features += mo_annotations
+
+    log.info("Locating missing tags")
+    mt_good, mt_bad, mt_results, mt_annotations = missing_tags(record)
+    gff3_qc_features += mt_annotations
+
+    log.info("Locating missing gene features")
+    mg_good, mg_bad, mg_results, mg_annotations = missing_genes(record)
+    gff3_qc_features += mg_annotations
+
+    log.info("Determining coding density")
+    cd, cd_real = coding_density(record)
+
+    log.info("Locating weird starts")
+    ws_good, ws_bad, ws_results, ws_annotations, ws_overall = weird_starts(record)
+    gff3_qc_features += ws_annotations
+
+    log.info("Locating bad gene models")
+    gm_good, gm_bad, gm_results, gm_annotations = bad_gene_model(record)
+    if gm_good + gm_bad == 0:
+        gm_bad = 1
+
+    log.info("Locating more bad gene models")
+    gmc_good, gmc_bad, gmc_results, gmc_annotations = gene_model_correction_issues(
+        record
+    )
+    if gmc_good + gmc_bad == 0:
+        gmc_bad = 1
+
+    good_scores = [eg_good, eo_good, mt_good, ws_good, gm_good, gmc_good]
+    bad_scores = [eg_bad, eo_bad, mt_bad, ws_bad, gm_bad, gmc_bad]
+
+    # Only if they tried to annotate RBSs do we consider them.
+    if mb_any:
+        good_scores.append(mb_good)
+        bad_scores.append(mb_bad)
+    subscores = []
+
+    for (g, b) in zip(good_scores, bad_scores):
+        if g + b == 0:
+            s = 0
+        else:
+            s = int(100 * float(g) / (float(b) + float(g)))
+        subscores.append(s)
+    subscores.append(cd)
+
+    score = int(float(sum(subscores)) / float(len(subscores)))
+
+    # This is data that will go into our HTML template
+    kwargs = {
+        "upstream_min": sd_min,
+        "upstream_max": sd_max,
+        "record_name": record.id,
+        "record_nice_name": nice_name(record),
+        "params": {
+            "sd_min": sd_min,
+            "sd_max": sd_max,
+            "min_gene_length": min_gene_length,
+            "excessive_gap_dist": excessive_gap_dist,
+            "excessive_gap_divergent_dist": excessive_gap_divergent_dist,
+            "excessive_overlap_dist": excessive_overlap_dist,
+            "excessive_overlap_divergent_dist": excessive_overlap_divergent_dist,
+        },
+        "score": score,
+        "encouragement": get_encouragement(score),
+        "genome_overview": genome_overview(record),
+        "rbss_annotated": mb_any,
+        "missing_rbs": mb_results,
+        "missing_rbs_good": mb_good,
+        "missing_rbs_bad": mb_bad,
+        "missing_rbs_score": 0
+        if mb_good + mb_bad == 0
+        else (100 * mb_good / (mb_good + mb_bad)),
+        "excessive_gap": eg_results,
+        "excessive_gap_good": eg_good,
+        "excessive_gap_bad": eg_bad,
+        "excessive_gap_score": 0
+        if eo_good + eo_bad == 0
+        else (100 * eo_good / (eo_good + eo_bad)),
+        "excessive_overlap": eo_results,
+        "excessive_overlap_good": eo_good,
+        "excessive_overlap_bad": eo_bad,
+        "excessive_overlap_score": 0
+        if eo_good + eo_bad == 0
+        else (100 * eo_good / (eo_good + eo_bad)),
+        "morons": mo_results,
+        "morons_good": mo_good,
+        "morons_bad": mo_bad,
+        "morons_score": 0
+        if mo_good + mo_bad == 0
+        else (100 * mo_good / (mo_good + mo_bad)),
+        "missing_tags": mt_results,
+        "missing_tags_good": mt_good,
+        "missing_tags_bad": mt_bad,
+        "missing_tags_score": 0
+        if mt_good + mt_bad == 0
+        else (100 * mt_good / (mt_good + mt_bad)),
+        "missing_genes": mg_results,
+        "missing_genes_good": mg_good,
+        "missing_genes_bad": mg_bad,
+        "missing_genes_score": 0
+        if mg_good + mg_bad == 0
+        else (100 * mg_good / (mg_good + mg_bad)),
+        "weird_starts": ws_results,
+        "weird_starts_good": ws_good,
+        "weird_starts_bad": ws_bad,
+        "weird_starts_overall": ws_overall,
+        "weird_starts_overall_sorted_keys": sorted(
+            ws_overall, reverse=True, key=lambda x: ws_overall[x]
+        ),
+        "weird_starts_score": 0
+        if ws_good + ws_bad == 0
+        else (100 * ws_good / (ws_good + ws_bad)),
+        "gene_model": gm_results,
+        "gene_model_good": gm_good,
+        "gene_model_bad": gm_bad,
+        "gene_model_score": 0
+        if gm_good + gm_bad == 0
+        else (100 * gm_good / (gm_good + gm_bad)),
+        "gene_model_correction": gmc_results,
+        "gene_model_correction_good": gmc_good,
+        "gene_model_correction_bad": gmc_bad,
+        "gene_model_correction_score": 0
+        if gmc_good + gmc_bad == 0
+        else (100 * gmc_good / (gmc_good + gmc_bad)),
+        "coding_density": cd,
+        "coding_density_exact": exact_coding_density(record),
+        "coding_density_real": cd_real,
+        "coding_density_score": cd,
+    }
+
+    with open(tbl, "w") as handle:
+        kw_subset = {}
+        for key in kwargs:
+            if (
+                key in ("score", "record_name")
+                or "_good" in key
+                or "_bad" in key
+                or "_overall" in key
+            ):
+                kw_subset[key] = kwargs[key]
+        json.dump(kw_subset, handle)
+
+    with open(gff3, "w") as handle:
+        gff3_qc_record.features = gff3_qc_features
+        gff3_qc_record.annotations = {}
+        gffWrite([gff3_qc_record], handle)
+
+    def nice_strand(direction):
+        # It is somehow possible for whole gffSeqFeature objects to end up in here, apparently at the gene level
+        if "SeqFeature" in str(type(direction)):
+          direction = direction.location.strand
+        if direction > 0:
+            return "→"#.decode("utf-8")
+        else:
+            return "←"#.decode("utf-8")
+
+    def nice_strand_tex(direction):
+        if "SeqFeature" in str(type(direction)):
+          direction = direction.location.strand
+        if direction > 0:
+            return "$\\rightarrow$"
+        else:
+            return "$\\leftarrow$"
+
+    def texify(data):
+        return data.replace("_", "\\_").replace("$", "\\$")
+
+    def length(data):
+        return len(data)
+
+    def my_encode(data):
+        return str(data)#.encode("utf-8")
+
+    def my_decode(data):
+        # For production
+        return str(data)#.decode("utf-8")
+        # For local testing. No, I do not understand.
+        return str(data)#.encode("utf-8")).decode("utf-8")
+
+    env = Environment(
+        loader=FileSystemLoader(SCRIPT_PATH), trim_blocks=True, lstrip_blocks=True
+    )
+    env.filters.update(
+        {
+            "nice_id": get_gff3_id,
+            "nice_strand": nice_strand,
+            "nice_strand_tex": nice_strand_tex,
+            "texify": texify,
+            "length": length,
+            "encode": my_encode,
+            "decode": my_decode,
+        }
+    )
+    tpl = env.get_template(reportTemplateName)
+    return tpl.render(**kwargs)#.encode("utf-8")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="rebase gff3 features against parent locations", epilog=""
+    )
+    parser.add_argument(
+        "annotations", type=argparse.FileType("r"), help="Parent GFF3 annotations"
+    )
+    parser.add_argument("genome", type=argparse.FileType("r"), help="Genome Sequence")
+    parser.add_argument(
+        "--gff3", type=str, help="GFF3 Annotations", default="qc_annotations.gff3"
+    )
+    parser.add_argument(
+        "--tbl",
+        type=str,
+        help="Table for noninteractive parsing",
+        default="qc_results.json",
+    )
+
+    parser.add_argument(
+        "--sd_min",
+        type=int,
+        help="Minimum distance from gene start for an SD to be",
+        default=5,
+    )
+    parser.add_argument(
+        "--sd_max",
+        type=int,
+        help="Maximum distance from gene start for an SD to be",
+        default=15,
+    )
+
+    parser.add_argument(
+        "--min_gene_length",
+        type=int,
+        help="Minimum length for a putative gene call (AAs)",
+        default=30,
+    )
+
+    parser.add_argument(
+        "--excessive_overlap_dist",
+        type=int,
+        help="Excessive overlap for genes in same direction",
+        default=25,
+    )
+    parser.add_argument(
+        "--excessive_overlap_divergent_dist",
+        type=int,
+        help="Excessive overlap for genes in diff directions",
+        default=50,
+    )
+
+    parser.add_argument(
+        "--excessive_gap_dist",
+        type=int,
+        help="Maximum distance between two genes",
+        default=40,
+    )
+    parser.add_argument(
+        "--excessive_gap_divergent_dist",
+        type=int,
+        help="Maximum distance between two divergent genes",
+        default=200,
+    )
+
+    parser.add_argument(
+        "--reportTemplateName",
+        help="Report template file name",
+        default="phageqc_report_full.html",
+    )
+
+    args = parser.parse_args()
+
+    sys.stdout.write(evaluate_and_report(**vars(args)))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/phage_annotation_validator.xml	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,102 @@
+<?xml version="1.0"?>
+<tool id="edu.tamu.cpt2.phage.annotation_validator" name="Phage QC" version="1.9.0" profile="16.04">
+    <description>validate phage annotations</description>
+    <macros>
+      <import>macros.xml</import>
+		<import>cpt-macros.xml</import>
+    </macros>
+    <requirements>
+      <requirement type="package" version="3.6">python</requirement>
+      <requirement type="package" version="1.77">biopython</requirement>
+      <requirement type="package" version="1.1.7">cpt_gffparser</requirement>  
+      <requirement type="package" version="0.12.0">python-levenshtein</requirement>
+      <requirement type="package" version="2019.06.08">regex</requirement>
+      <requirement type="package">metagene_annotator</requirement>
+      <requirement type="package" version="2.10.1">jinja2</requirement>
+      <requirement type="package" version="1.11">numpy</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+@GENOME_SELECTOR_PRE@
+
+python $__tool_directory__/phage_annotation_validator.py
+$gff3_data
+@GENOME_SELECTOR@
+
+--gff3 $gff3
+
+--sd_min $sd_min
+--sd_max $sd_max
+
+--min_gene_length $min_gene
+
+--excessive_overlap_dist $eod
+--excessive_overlap_divergent_dist $eodd
+
+--excessive_gap_dist $egd
+--excessive_gap_divergent_dist $egdd
+
+--reportTemplateName $report_format
+
+> $output;
+
+#if ".tex" in str($report_format):
+    mv $output tmp.tex;
+    docker run --rm -i --user="1002:1002" --net=none -v \$PWD:/data blang/latex pdflatex tmp.tex &&
+    docker run --rm -i --user="1002:1002" --net=none -v \$PWD:/data blang/latex pdflatex tmp.tex &&
+    mv tmp.pdf $output;
+#end if
+]]></command>
+    <inputs>
+        <expand macro="gff3_input" />
+        <expand macro="genome_selector" />
+
+        <param label="Minimum distance for SDs (bp)" name="sd_min" type="integer" value="5" />
+        <param label="Maximum distance for SDs (bp)" name="sd_max" type="integer" value="15" />
+
+        <param label="Minimum length of naively called ORFs in gaps (in AAs)" name="min_gene" type="integer" value="25" />
+
+        <param label="Excessive overlap distance (non-divergent, bp)" name="eod" type="integer" value="25" />
+        <param label="Excessive overlap distance (divergent, bp)" name="eodd" type="integer" value="50" />
+
+        <param label="Excessive gap distance (non-divergent, bp)" name="egd" type="integer" value="50" />
+        <param label="Excessive gap distance (divergent, bp)" name="egdd" type="integer" value="200" />
+
+        <param label="Report Format" type="select" name="report_format">
+            <option value="phageqc_report_full.html" selected="True">Full Report</option>
+            <option value="phageqc_report_464.html">464 Report</option>
+            <option value="phageqc_report_genomea.tex">GenomeA PDF Report</option>
+            <option value="phageqc_report_genomea.html">GenomeA HTML Report</option>
+        </param>
+    </inputs>
+    <outputs>
+      <data format="html" name="output">
+          <change_format>
+            <when input="report_format" value="phageqc_report_genomea.tex" format="pdf"/>
+          </change_format>
+      </data>
+      <data format="gff3" name="gff3" label="Phage QC annotation track"/>
+    </outputs>
+    <tests>
+      <test>
+        <param name="gff3_data" value="AY216660.gff3"/>
+        <param name="genome_fasta" value="AY216660.fasta" />
+	<output name="gff3" file="PhageQC_Out.gff3"/> 
+        <output name="output" file="PhageQC_Out.html"/>
+      </test>
+    </tests>    
+    <help><![CDATA[
+**What it does**
+
+Run CPT's Phage Annotation Validator validating the following properties:
+
+-  Missing RBSs
+-  Missing Gene Features
+-  Excessive Gaps
+-  Excessive Overlaps
+-  Morons
+-  Weird Start Codons
+-  Incorrect gene model (when used with our Genbank Gene Model correction tool)
+
+        ]]></help>
+		<expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/phageqc_report_464.html	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,419 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
+    <meta name="description" content="">
+    <meta name="author" content="">
+    <title>[BICH464] Phage QC on {{record_name}} - {{score}}</title>
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap-theme.min.css">
+
+    <style type="text/css">
+/*
+ * Base structure
+ */
+
+/* Move down content because we have a fixed navbar that is 50px tall */
+body {
+  padding-top: 50px;
+}
+
+h3:before {
+  display: block;
+  content: " ";
+  margin-top: -50px;
+  height: 50px;
+  visibility: hidden;
+}
+
+/*
+ * Global add-ons
+ */
+
+.sub-header {
+  padding-bottom: 10px;
+  border-bottom: 1px solid #eee;
+}
+
+/*
+ * Top navigation
+ * Hide default border to remove 1px line.
+ */
+.navbar-fixed-top {
+  border: 0;
+}
+
+/*
+ * Sidebar
+ */
+
+/* Hide for mobile, show later */
+.sidebar {
+  display: none;
+}
+@media (min-width: 768px) {
+  .sidebar {
+    position: fixed;
+    top: 51px;
+    bottom: 0;
+    left: 0;
+    z-index: 1000;
+    display: block;
+    padding: 20px;
+    overflow-x: hidden;
+    overflow-y: auto; /* Scrollable contents if viewport is shorter than content. */
+    background-color: #f5f5f5;
+    border-right: 1px solid #eee;
+  }
+}
+
+/* Sidebar navigation */
+.nav-sidebar {
+  margin-right: -21px; /* 20px padding + 1px border */
+  margin-bottom: 20px;
+  margin-left: -20px;
+}
+.nav-sidebar > li > a {
+  padding-right: 20px;
+  padding-left: 20px;
+}
+.nav-sidebar > .active > a,
+.nav-sidebar > .active > a:hover,
+.nav-sidebar > .active > a:focus {
+  color: #fff;
+  background-color: #428bca;
+}
+
+
+/*
+ * Main content
+ */
+
+.main {
+  padding: 20px;
+}
+@media (min-width: 768px) {
+  .main {
+    padding-right: 40px;
+    padding-left: 40px;
+  }
+}
+.main .page-header {
+  margin-top: 0;
+}
+
+
+/*
+ * Placeholder dashboard ideas
+ */
+
+.placeholders {
+  margin-bottom: 30px;
+  text-align: center;
+}
+.placeholders h4 {
+  margin-bottom: 0;
+}
+.placeholder {
+  margin-bottom: 20px;
+}
+.placeholder img {
+  display: inline-block;
+  border-radius: 50%;
+}
+
+
+
+
+
+
+
+/* CUSTOM CSS */
+.spark {
+        position: relative;
+        margin:5px;
+}
+.spark span {
+        padding: 0px;
+        padding-left: 20px;
+        padding-right: 20px;
+        margin: 5px;
+        position: relative;
+}
+.spark .plus {
+        top: -10px;
+        background: #aaaaff;
+}
+.spark .minus {
+        top: 10px;
+        background: #ffaaaa;
+}
+.plus-focus {
+        top: -10px;
+        background: blue;
+}
+.minus-focus {
+        top: 10px;
+        background: red;
+}
+    </style>
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
+    <!--[if lt IE 9]>
+      <script src="//oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
+      <script src="//oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+    <![endif]-->
+  </head>
+  <body>
+
+    <nav class="navbar navbar-inverse navbar-fixed-top">
+      <div class="container-fluid">
+        <div class="navbar-header">
+          <a class="navbar-brand" href="#">[BICH464] Phage QC on {{record_name}}</a>
+        </div>
+      </div>
+    </nav>
+
+    <div class="container-fluid">
+      <div class="row">
+        <div class="col-sm-3 col-md-2 sidebar">
+          <ul class="nav nav-sidebar">
+            <li><a href="#main"><b>Overview</b></a></li>
+            <li><a href="#bad_gene_starts"><b>Bad Gene Starts</b></a></li>
+            <li><a href="#missing_rbs"><small>Missing RBS</small></a></li>
+            <li><a href="#weird_starts"><small>Unusual Start Codons</small></a></li>
+            <li><a href="#excessive_gaps"><small>Excessive Gaps</small></a></li>
+            <li><a href="#excessive_overlap"><small>Excessive Overlaps</small></a></li>
+            <!--<li><a href="#coding_density"><small>Coding Density</small></a></li>-->
+            <li><a href="#gene_model"><b>Gene Model Issues</b></a></li>
+          </ul>
+        </div>
+        <div class="col-sm-9 col-sm-offset-3 col-md-10 col-md-offset-2 main" id="main">
+            <div class="jumbotron">
+                <div class="row">
+                    <div class="col-sm-7">
+                        <h1>Phage {{record_name}}</h1>
+                        <!--<h2>Score: {{ '%d' % ((gene_model_score + coding_density + excessive_overlap_score +  excessive_overlap_score + missing_rbs_score) / 5)}}</h2>-->
+                    </div>
+                    <!--<div class="col-sm-5">
+                        <table class="table table-striped">
+                            <thead>
+                                <tr>
+                                    <th>Section</th>
+                                    <th>Score</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                <tr><td>Missing RBS</td><td>{{ '%d' % missing_rbs_score }}%</td></tr>
+                                <tr><td>Excessive Gaps</td><td>{{ '%d' % excessive_gap_score }}%</td></tr>
+                                <tr><td>Excessive Overlap</td><td>{{ '%d' % excessive_overlap_score }}%</td></tr>
+                                <tr><td>Coding Density Score</td><td>{{ '%d' % coding_density }}%</td></tr>
+                                <tr><td>Coding Density Real</td><td>{{ '%0.2f' % (100 * coding_density_exact) }}%</td></tr>
+                                <tr><td>Gene Model Issues</td><td>{{ '%d' % gene_model_score }}%</td></tr>
+                            </tbody>
+                        </table>
+                    </div>-->
+                </div>
+                <div class="row">
+                    <h3>Genome Overview</h3>
+                    <h4>Genes</h4>
+                    <ul>
+                        <li>Count: {{ genome_overview.genes.count }}</li>
+                        <li>Bases: {{ genome_overview.genes.bases }}</li>
+                        <li>Average Length: {{ genome_overview.genes.avg_len | round | int}}</li>
+                        <li>Coding Density: {{ '%0.2f' % (100 * coding_density_exact) }}%</li>
+                        <li>Composition
+                            <ul>
+                                <li>A {{ genome_overview.genes.comp.A }}</li>
+                                <li>C {{ genome_overview.genes.comp.C }}</li>
+                                <li>T {{ genome_overview.genes.comp.T }}</li>
+                                <li>G {{ genome_overview.genes.comp.G }}</li>
+                            </ul>
+                        </li>
+                    </ul>
+                    <h4>Overall</h4>
+                    <ul>
+                        <li>%GC: {{ '%0.2f' % (100 * genome_overview.overall.gc) }}%</li>
+                        <li>Composition
+                            <ul>
+                                <li>A {{ genome_overview.overall.comp.A }}</li>
+                                <li>C {{ genome_overview.overall.comp.C }}</li>
+                                <li>T {{ genome_overview.overall.comp.T }}</li>
+                                <li>G {{ genome_overview.overall.comp.G }}</li>
+                            </ul>
+                        </li>
+                    </ul>
+                </div>
+            </div>
+
+          <h2 class="sub-header" id="bad_gene_starts">Gene Starts</h2>
+          <h3 id="missing_rbs">Genes Missing RBS <small>{{missing_rbs_good}} / {{missing_rbs_good + missing_rbs_bad}}</small></h3>
+          <p>The following genes have issues with their RBS.</p>
+          {% if not rbss_annotated %}
+          <p>
+            Since you have not annotated any possible RBSs, this does not count off from your overall score.
+          </p>
+          {% endif %}
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>ID</th>
+                  <th>Location</th>
+                  <th>Error</th>
+                  <th>Upstream (-{{upstream_max}} .. -{{upstream_min}})</th>
+                </tr>
+              </thead>
+              <tbody>
+                {% for row in missing_rbs %}
+                {% if "None found" in row.__message %}
+                <tr>
+                    <td>{{row | nice_id | decode}}</td>
+                    <td>{{row.location.start}}..{{row.location.end}} [{{row.strand}}]</td>
+                    <td>None found</td>
+                    <td><span style="font-family:monospace">{{row.__upstream }}</span></td>
+                </tr>
+                {% endif %}
+                {% endfor %}
+              </tbody>
+            </table>
+          </div>
+
+          <h3 id="weird_starts">Start Codon Usage</h3>
+          <p>This section covers genes with unusual start codons</p>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Start Codon</th>
+                  <th>Count</th>
+                </tr>
+              </thead>
+              <tbody>
+                  {% for codon_key in weird_starts_overall_sorted_keys %}
+                  <tr><td>{{ codon_key }}</td><td>{{ weird_starts_overall[codon_key] }}</td></tr>
+                  {% endfor %}
+              </tbody>
+            </table>
+          </div>
+
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>ID</th>
+                  <th>Location</th>
+                  <th>Error</th>
+                </tr>
+              </thead>
+              <tbody>
+                {% for row in weird_starts %}
+                <tr>
+                    <td>{{row | nice_id| decode}}</td>
+                    <td>{{row.location.start}}..{{row.location.end}} [{{row.strand}}]</td>
+                    <td>{{row.qualifiers.get('note', [])}}</td>
+                </tr>
+                {% endfor %}
+              </tbody>
+            </table>
+          </div>
+
+          <h3 id="excessive_gaps">Intergenic Gaps</h3>
+          <p>Phage genomes are under pressure to maintain high coding density. Large intergenic gaps may be a sign of incorrect gene starts or missing genes.</p>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Region</th>
+                  <th>Size</th>
+                  <th>Bounding Gene Transcription Direction</th>
+                  <th>Message</th>
+                </tr>
+              </thead>
+              <tbody>
+                {% for row in excessive_gap %}
+                <tr>
+                    <td>{{row[0]}} .. {{row[1]}}</td>
+                    <td>{{row[1] - row[0]}}</td>
+                    <td>{{row[2] | nice_strand}} {{row[3] | nice_strand}}</td>
+                    <td>
+                        {% if row[4] == 0 %}
+                        {% else %}
+                        {{row[4]}} possible genes found in this region
+                        {% endif %}
+                    </td>
+                </tr>
+                {% endfor %}
+              </tbody>
+            </table>
+          </div>
+
+          <h3 id="excessive_overlap">Overlapping Genes</h3>
+          <p>Large gene overlaps may indicate an incorrect gene start or miscalled gene.</p>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Feature A</th>
+                  <th>Feature B</th>
+                  <th>Shared Region</th>
+                  <th>Overlap Length</th>
+                </tr>
+              </thead>
+              <tbody>
+                {% for row in excessive_overlap %}
+                <tr>
+                    <td>{{row[0] | nice_id | decode}} ({{row[0].location}})</td>
+                    <td>{{row[1] | nice_id | decode}} ({{row[1].location}})</td>
+                    <td>{{row[2]}}..{{row[3]}}</td>
+                    <td>{{row[3] - row[2]}}bp</td>
+                </tr>
+                {% endfor %}
+              </tbody>
+            </table>
+          </div>
+          <!--<h3 id="coding_density">Coding Density Issues <small>{{ coding_density }} / 100</small></h3>
+          <div class="table-responsive">
+            <p>
+            You have a coding density score of {{ coding_density_real }} which scores
+            {{ coding_density }} / 100. Most genomes should be in the 90% to
+            100% coding density range. Your exact coding density is {{ coding_density_exact }}.
+            </p>
+          </div>-->
+
+          <h2 class="sub-header" id="gene_model">Gene Model Issues</h2>
+          <p>These issues are mostly derived from how Apollo handles the gene model. <img src=""></p>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>ID</th>
+                  <th>Exon</th>
+                  <th>CDS</th>
+                  <th>Message</th>
+                </tr>
+              </thead>
+              <tbody>
+                {% for row in gene_model %}
+                <tr>
+                    <td>{{row[0]}}</td>
+                    <td>{{row[1].location}}</td>
+                    <td>{{row[2].location}}</td>
+                    <td>{{row[3]}}</td>
+                </tr>
+                {% endfor %}
+              </tbody>
+            </table>
+          </div>
+
+        </div>
+      </div>
+    </div>
+
+
+    <script src="//ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
+  </body>
+</html>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/phageqc_report_annotation_table.html	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,244 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
+    <meta name="description" content="">
+    <meta name="author" content="">
+    <title>Annotation Table</title>
+    <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.12.2/jquery.min.js"></script>
+	<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap.min.css" crossorigin="anonymous">
+	<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/css/bootstrap-theme.min.css" crossorigin="anonymous">
+	<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.6/js/bootstrap.min.js" crossorigin="anonymous"></script>
+
+    <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.11/css/jquery.dataTables.css">
+    <script type="text/javascript" charset="utf8" src="https://cdn.datatables.net/1.10.11/js/jquery.dataTables.js"></script>
+    <style type="text/css">
+/*
+ * Base structure
+ */
+
+/* Move down content because we have a fixed navbar that is 50px tall */
+body {
+  padding-top: 50px;
+}
+
+h3:before {
+  display: block;
+  content: " ";
+  margin-top: -50px;
+  height: 50px;
+  visibility: hidden;
+}
+
+/*
+ * Global add-ons
+ */
+
+.sub-header {
+  padding-bottom: 10px;
+  border-bottom: 1px solid #eee;
+}
+
+/*
+ * Top navigation
+ * Hide default border to remove 1px line.
+ */
+.navbar-fixed-top {
+  border: 0;
+}
+
+/*
+ * Sidebar
+ */
+
+/* Hide for mobile, show later */
+.sidebar {
+  display: none;
+}
+@media (min-width: 768px) {
+  .sidebar {
+    position: fixed;
+    top: 51px;
+    bottom: 0;
+    left: 0;
+    z-index: 1000;
+    display: block;
+    padding: 20px;
+    overflow-x: hidden;
+    overflow-y: auto; /* Scrollable contents if viewport is shorter than content. */
+    background-color: #f5f5f5;
+    border-right: 1px solid #eee;
+  }
+}
+
+/* Sidebar navigation */
+.nav-sidebar {
+  margin-right: -21px; /* 20px padding + 1px border */
+  margin-bottom: 20px;
+  margin-left: -20px;
+}
+.nav-sidebar > li > a {
+  padding-right: 20px;
+  padding-left: 20px;
+}
+.nav-sidebar > .active > a,
+.nav-sidebar > .active > a:hover,
+.nav-sidebar > .active > a:focus {
+  color: #fff;
+  background-color: #428bca;
+}
+
+
+/*
+ * Main content
+ */
+
+.main {
+  padding: 20px;
+}
+@media (min-width: 768px) {
+  .main {
+    padding-right: 40px;
+    padding-left: 40px;
+  }
+}
+.main .page-header {
+  margin-top: 0;
+}
+
+
+/*
+ * Placeholder dashboard ideas
+ */
+
+.placeholders {
+  margin-bottom: 30px;
+  text-align: center;
+}
+.placeholders h4 {
+  margin-bottom: 0;
+}
+.placeholder {
+  margin-bottom: 20px;
+}
+.placeholder img {
+  display: inline-block;
+  border-radius: 50%;
+}
+
+td.moron {
+    font-size: 150%;
+    padding: 0px;
+    color: gray;
+}
+.strand_emph {
+    text-decoration: underline;
+    color: black;
+}
+
+    </style>
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
+    <!--[if lt IE 9]>
+      <script src="//oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
+      <script src="//oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+    <![endif]-->
+  </head>
+  <body>
+
+    <nav class="navbar navbar-inverse navbar-fixed-top">
+      <div class="container-fluid">
+        <div class="navbar-header">
+            <a class="navbar-brand" href="#">Annotation Table</a>
+        </div>
+      </div>
+    </nav>
+
+    <div class="container-fluid">
+      <div class="row">
+        <div class="col-md-12 main" id="main">
+            <ul class="nav nav-tabs" role="tablist" id="myTabs">
+                <li role="presentation" class="active"><a href="#overview" role="tab" data-toggle="tab">Overview</a></li>
+                {% for (record, data) in annotation_table_data %}
+                <li role="presentation"><a href="#{{ record.id }}" role="tab" data-toggle="tab">{{ record.id }}</a></li>
+                {% endfor %}
+            </ul>
+
+            <!-- Tab panes -->
+            <div class="tab-content">
+                <div role="tabpanel" class="tab-pane active" id="overview">
+                    Data on each organism will be accessible from the tabs above.
+                </div>
+                {% for (record, data) in annotation_table_data %}
+                    <div role="tabpanel" class="tab-pane" id="{{ record.id }}">
+                        <table class="table table-striped" id="data">
+                            <thead>
+                                <tr>
+                                {% for col in annotation_table_col_names %}
+                                    <th>{{ col[0] }}</th>
+                                {% endfor %}
+                                </tr>
+                            </thead>
+                            <tbody>
+                                {% for row in data %}
+                                    <tr>
+                                    {% for col in row %}
+                                    <td>{% if col is not string %}<ul>{% for val in col %}<li>{{ val }}</li>{% endfor %}</ul>{% else %}{{ col }}{% endif %}</td>
+                                    {% endfor %}
+                                    </tr>
+                                {% endfor %}
+                            </tbody>
+                        </table>
+                    </div>
+                {% endfor %}
+            </div>
+        </div>
+      </div>
+    </div>
+
+    <script type="text/javascript">
+        $(document).ready( function () {
+
+
+
+$('#myTabs a').click(function (e) {
+	e.preventDefault()
+	$(this).tab('show')
+})
+
+
+
+jQuery.fn.dataTable.ext.type.detect.unshift( function ( data ) {
+    if ( typeof data !== 'string' ) {
+        return null;
+    }
+
+    var matches = data.match(/^(\d+)\.\.(\d+)/);
+    return matches ? 'genomic' : null;
+} );
+
+
+
+jQuery.extend( jQuery.fn.dataTableExt.oSort, {
+    "genomic-pre": function ( a ) {
+        var matches = a.match(/^(\d+)\.\.(\d+)/);
+        console.log(a + " " + matches[1]);
+        return parseInt(matches[1]);
+    },
+
+    "genomic-asc": function ( a, b ) {
+        return ((a < b) ? -1 : ((a > b) ? 1 : 0));
+    },
+
+    "genomic-desc": function ( a, b ) {
+        return ((a < b) ? 1 : ((a > b) ? -1 : 0));
+    }
+} );
+
+            $('table').DataTable();
+        });
+    </script>
+  </body>
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/phageqc_report_full.html	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,412 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
+    <meta name="description" content="">
+    <meta name="author" content="">
+    <title>Phage QC on {{record_name}} - {{score}}</title>
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap-theme.min.css">
+
+    <style type="text/css">
+/*
+ * Base structure
+ */
+
+/* Move down content because we have a fixed navbar that is 50px tall */
+body {
+  padding-top: 50px;
+}
+
+h3:before {
+  display: block;
+  content: " ";
+  margin-top: -50px;
+  height: 50px;
+  visibility: hidden;
+}
+
+/*
+ * Global add-ons
+ */
+
+.sub-header {
+  padding-bottom: 10px;
+  border-bottom: 1px solid #eee;
+}
+
+/*
+ * Top navigation
+ * Hide default border to remove 1px line.
+ */
+.navbar-fixed-top {
+  border: 0;
+}
+
+/*
+ * Sidebar
+ */
+
+/* Hide for mobile, show later */
+.sidebar {
+  display: none;
+}
+@media (min-width: 768px) {
+  .sidebar {
+    position: fixed;
+    top: 51px;
+    bottom: 0;
+    left: 0;
+    z-index: 1000;
+    display: block;
+    padding: 20px;
+    overflow-x: hidden;
+    overflow-y: auto; /* Scrollable contents if viewport is shorter than content. */
+    background-color: #f5f5f5;
+    border-right: 1px solid #eee;
+  }
+}
+
+/* Sidebar navigation */
+.nav-sidebar {
+  margin-right: -21px; /* 20px padding + 1px border */
+  margin-bottom: 20px;
+  margin-left: -20px;
+}
+.nav-sidebar > li > a {
+  padding-right: 20px;
+  padding-left: 20px;
+}
+.nav-sidebar > .active > a,
+.nav-sidebar > .active > a:hover,
+.nav-sidebar > .active > a:focus {
+  color: #fff;
+  background-color: #428bca;
+}
+
+
+/*
+ * Main content
+ */
+
+.main {
+  padding: 20px;
+}
+@media (min-width: 768px) {
+  .main {
+    padding-right: 40px;
+    padding-left: 40px;
+  }
+}
+.main .page-header {
+  margin-top: 0;
+}
+
+
+/*
+ * Placeholder dashboard ideas
+ */
+
+.placeholders {
+  margin-bottom: 30px;
+  text-align: center;
+}
+.placeholders h4 {
+  margin-bottom: 0;
+}
+.placeholder {
+  margin-bottom: 20px;
+}
+.placeholder img {
+  display: inline-block;
+  border-radius: 50%;
+}
+
+td.moron {
+    font-size: 150%;
+    padding: 0px;
+    color: gray;
+}
+.strand_emph {
+    text-decoration: underline;
+    color: black;
+}
+
+    </style>
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
+    <!--[if lt IE 9]>
+      <script src="//oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
+      <script src="//oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+    <![endif]-->
+  </head>
+  <body>
+
+    <nav class="navbar navbar-inverse navbar-fixed-top">
+      <div class="container-fluid">
+        <div class="navbar-header">
+          <a class="navbar-brand" href="#">Phage QC on {{record_name}}</a>
+        </div>
+      </div>
+    </nav>
+
+    <div class="container-fluid">
+      <div class="row">
+        <div class="col-sm-3 col-md-2 sidebar">
+          <ul class="nav nav-sidebar">
+            <li><a href="#main"><b>Overview</b></a></li>
+            <li><a href="#bad_gene_starts"><b>Bad Gene Starts</b></a></li>
+            <li><a href="#missing_rbs"><small>Missing RBS</small></a></li>
+            <li><a href="#weird_starts"><small>Unusual Start Codons</small></a></li>
+            <li><a href="#excessive_gaps"><small>Excessive Gaps</small></a></li>
+            <li><a href="#excessive_overlap"><small>Excessive Overlaps</small></a></li>
+            <!--<li><a href="#coding_density"><small>Coding Density</small></a></li>-->
+            <li><a href="#antisense"><b>Antisense Genes</b></a></li>
+            <li><a href="#morons"><small>Possible Morons</small></a></li>
+            <li><a href="#annotations"><b>Annotation Issues</b></a></li>
+            <li><a href="#missing_product"><small>Missing Product Tags</small></a></li>
+          </ul>
+        </div>
+        <div class="col-sm-9 col-sm-offset-3 col-md-10 col-md-offset-2 main" id="main">
+            <div class="jumbotron">
+                <div class="row">
+                    <div class="col-sm-7">
+                        <h1>Phage {{record_name}}</h1>
+                        <!--<h2>Score: {{score}}/100</h2>-->
+                    </div>
+                    <!--<div class="col-sm-5">
+                        <table class="table table-striped">
+                            <thead>
+                                <tr>
+                                    <th>Section</th>
+                                    <th>Score</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                <tr><td>Missing RBS</td><td>{{ '%d' % missing_rbs_score }}%</td></tr>
+                                <tr><td>Excessive Gaps</td><td>{{ '%d' % excessive_gap_score }}%</td></tr>
+                                <tr><td>Excessive Overlap</td><td>{{ '%d' % excessive_overlap_score }}%</td></tr>
+                                <tr><td>Coding Density Score</td><td>{{ '%d' % coding_density }}%</td></tr>
+                                <tr><td>Possible Morons</td><td>{{ '%d' % (100 * morons_good / (morons_good + morons_bad))}}%</td></tr>
+                                <tr><td>Missing Product Tags</td><td>{{ '%d' % (100 * missing_tags_good / (missing_tags_good + missing_tags_bad))}}%</td></tr>
+                            </tbody>
+                        </table>
+                    </div>-->
+                </div>
+            </div>
+
+          <h2 class="sub-header" id="bad_gene_starts">Gene Starts</h2>
+          <h3 id="missing_rbs">Genes missing RBS <small>{{missing_rbs_good}} / {{missing_rbs_good + missing_rbs_bad}}</small></h3>
+          <p>The following genes have issues with their RBS.</p>
+          {% if not rbss_annotated %}
+          <p>
+            Since you have not annotated any possible RBSs, this does not count off from your overall score.
+          </p>
+          {% endif %}
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Feature Type</th>
+                  <th>ID</th>
+                  <th>Location</th>
+                  <th>Error</th>
+                  <th>Upstream (-{{upstream_max}} .. -{{upstream_min}})</th>
+                </tr>
+              </thead>
+              <tbody>
+                {% for row in missing_rbs %}
+                <tr>
+                    <td>{{row.type}}</td>
+                    <td>{{row.id}}</td>
+                    <td>{{row.location.start}}..{{row.location.end}} [{{row.strand}}]</td>
+                    <td>{{row.__message}}</td>
+                    <td><span style="font-family:monospace">{{row.__upstream}}</span></td>
+                </tr>
+                {% endfor %}
+              </tbody>
+            </table>
+          </div>
+
+          <h3 id="weird_starts">Start Codon Usage</h3>
+          <p>This section covers genes with unusual start codons</p>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Start Codon</th>
+                  <th>Count</th>
+                </tr>
+              </thead>
+              <tbody>
+                  {% for codon_key in weird_starts_overall_sorted_keys %}
+                  <tr><td>{{ codon_key }}</td><td>{{ weird_starts_overall[codon_key] }}</td></tr>
+                  {% endfor %}
+              </tbody>
+            </table>
+          </div>
+
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Feature Type</th>
+                  <th>ID</th>
+                  <th>Location</th>
+                  <th>Error</th>
+                </tr>
+              </thead>
+              <tbody>
+                {% for row in weird_starts %}
+                <tr>
+                    <td>{{row.type}}</td>
+                    <td>{{row.id}}</td>
+                    <td>{{row.location.start}}..{{row.location.end}} [{{row.strand}}]</td>
+                    <td>{{row.__error}}</td>
+                </tr>
+                {% endfor %}
+              </tbody>
+            </table>
+          </div>
+
+          <h3 id="excessive_gaps">Intergenic Gaps</h3>
+          <p>Phage genomes are under pressure to maintain high coding density. Large intergenic gaps may be a sign of incorrect gene starts or missing genes.</p>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Region</th>
+                  <th>Size</th>
+                  <th>Bounding Gene Transcription Direction</th>
+                  <th>Message</th>
+                </tr>
+              </thead>
+              <tbody>
+                {% for row in excessive_gap %}
+                <tr>
+                    <td>{{row[0]}} .. {{row[1]}}</td>
+                    <td>{{row[1] - row[0]}}</td>
+                    <td>{{row[2] | nice_strand}} {{row[3] | nice_strand}}</td>
+                    <td>
+                        {% if row[4] == 0 %}
+                        {% else %}
+                        {{row[4]}} ORFs found in this region
+                        {% endif %}
+                    </td>
+                </tr>
+                {% endfor %}
+              </tbody>
+            </table>
+          </div>
+
+          <h3 id="excessive_overlap">Overlapping Genes </h3>
+          <p>Large gene overlaps may indicate an incorrect gene start or miscalled gene.</p>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Feature A</th>
+                  <th>Feature B</th>
+                  <th>Shared Region</th>
+                  <th>Overlap Length</th>
+                </tr>
+              </thead>
+              <tbody>
+                {% for row in excessive_overlap %}
+                <tr>
+                    <td>{{row[0].id}} ({{row[0].location}})</td>
+                    <td>{{row[1].id}} ({{row[1].location}})</td>
+                    <td>{{row[2]}}..{{row[3]}}</td>
+                    <td>{{row[3] - row[2]}} bp</td>
+                </tr>
+                {% endfor %}
+              </tbody>
+            </table>
+          </div>
+          <!--<h3 id="coding_density">Coding Density Issues <small>{{ coding_density }} / 100</small></h3>
+          <div class="table-responsive">
+            <p>
+            You have a coding density of {{ coding_density_real }} which scores
+            {{ coding_density }} / 100. Most genomes should be in the 90% to
+            100% coding density range
+            </p>
+          </div>-->
+
+          <h2 class="sub-header" id="antisense">Antisense Genes</h2>
+          <h3 id="morons">Possible Morons <small>{{morons_good}} / {{morons_good + morons_bad}} (Doesn't count towards score)</small></h3>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Feature</th>
+                  <th>RBS</th>
+                  <th>Surrounding Features</th>
+                </tr>
+              </thead>
+              <tbody>
+                {% for row in morons %}
+                <tr>
+                    <td>{{row[0].id}}</td>
+                    <td>{{row[3]}}</td>
+                    <td class="moron">
+                        {% for x in row[1] %}
+                            {{ x | nice_strand }}
+                        {% endfor %}
+                        <span class="strand_emph">{{ row[0].strand | nice_strand }}</span>
+                        {% for x in row[2] %}
+                            {{ x | nice_strand }}
+                        {% endfor %}
+                        </div>
+                    </td>
+                </tr>
+                {% endfor %}
+              </tbody>
+            </table>
+          </div>
+
+
+          <h2 class="sub-header" id="annotations">Annotation Issues</h2>
+          <h3 id="missing_product">Missing Product Tags <small>{{missing_tags_good}} / {{missing_tags_good + missing_tags_bad}}</small></h3>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Feature</th>
+                  <th>Qualifiers</th>
+                </tr>
+              </thead>
+              <tbody>
+                {% for row in missing_tags %}
+                <tr>
+                    <td>{{row.id}}</td>
+                    <td>
+                        {% for key in row.qualifiers %}
+                        {{ key }}
+                        <ul>
+                            {% for value in row.qualifiers[key] %}
+                            <li>{{value}}</li>
+                            {% endfor %}
+                        </ul>
+                        {% endfor %}
+                    </td>
+                </tr>
+                {% endfor %}
+              </tbody>
+            </table>
+          </div>
+
+
+
+
+
+        </div>
+      </div>
+    </div>
+
+
+    <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
+  </body>
+</html>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/phageqc_report_genomea.tex	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,235 @@
+\documentclass[]{article}
+\usepackage{lmodern}
+\usepackage{amssymb,amsmath}
+\usepackage{ifxetex,ifluatex}
+\usepackage{fixltx2e} % provides \textsubscript
+\usepackage[T1]{fontenc}
+\usepackage[utf8]{inputenc}
+
+\addtolength{\oddsidemargin}{-.875in}
+\addtolength{\evensidemargin}{-.875in}
+\addtolength{\textwidth}{1.75in}
+
+\addtolength{\topmargin}{-.875in}
+\addtolength{\textheight}{1.75in}
+
+\usepackage{fancyhdr}
+\pagestyle{fancy}
+\lhead{GenomeA Compliance Report}
+\chead{}
+\rhead{ {{record_name | texify}} }
+\lfoot{}
+\cfoot{\thepage}
+\rfoot{}
+
+
+
+\usepackage{microtype}
+\usepackage{hyperref}
+\hypersetup{unicode=true,
+            pdfborder={0 0 0},
+            breaklinks=true}
+\urlstyle{same}  % don't use monospace font for urls
+\usepackage{longtable,booktabs}
+\date{Compiled \today}
+\title{GenomeA Compliance Report for {{record_nice_name | texify}}}
+
+\begin{document}
+%\pagestyle{plain}
+\maketitle
+This report details possible issues with your submitted genome annotations.
+
+\section{Required Changes}
+
+The changes detailed in this section are required for acceptance of your
+submission.
+
+\subsection{Missing Gene Features}
+
+These coding sequences (``CDS'' in your GenBank file) are missing the
+associated gene feature (``gene''). This is required for validation by NCBI's
+rules which are encoded in the sequin and tbl2asn programs.
+{%if missing_genes_bad > 0 %}
+
+{{ missing_genes_bad }} out of {{ missing_genes_good + missing_genes_bad
+}} features are lacking their associated gene feature.
+
+\begin{longtable}{ll}
+\hline
+Feature ID & Location\\
+\hline
+\endhead
+{% for row in missing_genes %}
+{{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}}\tabularnewline
+{% endfor %}
+\end{longtable}
+{% else %}
+You are not missing any gene features
+{% endif %}
+
+\subsection{Missing Product Tags}\label{missing-product-tags}
+
+{{missing_tags_good}} out of {{missing_tags_good + missing_tags_bad}} features have product tags (\texttt{/product="..."}).
+{% if missing_tags_bad > 0 %}
+The following features are missing product tags
+\begin{longtable}{ll}
+\hline
+Feature & Location\\
+\hline
+\endhead
+{% for row in missing_tags %}
+{{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}}\tabularnewline
+{% endfor %}
+\end{longtable}
+{% endif %}
+
+\subsection{Missing Locus Tags}\label{missing-locus-tags}
+
+{{gene_model_correction_good}} out of {{gene_model_correction_good + gene_model_correction_bad}} features have valid locus tags (\texttt{/locus\_tag="..."}).
+{% if gene_model_correction_bad > 0 %}
+The following features have issues with their locus tags
+\begin{longtable}{lllll}
+\hline
+ID & Location & Gene Locus Tag & CDS Locus Tag & Issue \\
+\hline
+\endhead
+{% for row in gene_model_correction %}
+{{ row[0].id | texify }} & \texttt{{'{'}}{{row[1].location}}{{'}'}} & {{ row[0].qualifiers['locus_tag'][0] | texify }} & {{ row[1].qualifiers['locus_tag'][0] | texify }} & {{ row[2] | texify }}\tabularnewline
+{% endfor %}
+\end{longtable}
+{% endif %}
+
+
+\section{Suggested Changes}\label{suggested-changes}
+
+These changes are not required, but are strongly encouraged in order to
+provide a uniform genome annotation within the phage community.
+
+\subsection{Start Codons}\label{start-codons}
+Nearly all phage genes use ATG, GTG or TTG as start codons. The start codon distribution is as
+follows:
+
+
+\begin{longtable}{lll}
+\hline
+Start Codon & Count\\
+\hline
+\endhead
+{% for codon_key in weird_starts_overall_sorted_keys %}
+{{ codon_key }} & {{ weird_starts_overall[codon_key] }} \\
+{% endfor %}
+\end{longtable}
+
+{% if weird_starts_bad != 0 %}
+There are {{weird_starts_bad }} unusual start codons in the genome, these
+should be carefully justified. If there is evidence for these starts, the
+GenomeA text should note this.
+
+\begin{longtable}{lll}
+\hline
+Feature ID & Location & Start Codon\\
+\hline
+\endhead
+{% for row in weird_starts %}
+{{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__start}} \\
+{% endfor %}
+\end{longtable}
+
+{% endif %}
+
+\subsection{Unannotated RBSs}\label{unannotated-rbss}
+
+The following CDSs either do not have a detectable ribosome binding site (RBS;
+Shine-Dalgarno sequence), in which case there is a strong possibility that
+this is not the correct start, or there is one but it is not annotated.
+Annotating the RBS as part of the gene feature is the best practice.
+
+\begin{longtable}{lllll}
+\hline
+ID & Location & Error & Upstream (-{{upstream_max}} .. -{{upstream_min}})\\
+\hline
+\endhead
+{% for row in missing_rbs %}
+{% if 'Unannotated' not in row.__message%}
+{{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__message | texify}} & \texttt{{'{'}}{{row.__upstream}}{{'}'}} \\
+{% endif %}
+{% endfor %}
+{% for row in missing_rbs %}
+{% if 'Unannotated' in row.__message%}
+{{ row.id | texify }} & \texttt{{'{'}}{{row.location}}{{'}'}} & {{row.__message | texify}} & \texttt{{'{'}}{{row.__upstream}}{{'}'}} \\
+{% endif %}
+{% endfor %}
+\end{longtable}
+
+\section{Areas for Further Examination}\label{notes}
+
+These areas may be indicative of a problem, or may simply be
+informational. You should examine the areas mentioned in detail to ensure
+that the annotations are valid and that no genes are missed.
+
+
+
+
+
+\subsection{Unusual Gaps}\label{excessive-gaps}
+
+{% if excessive_gap | length == 0 %}
+No gaps over {{ params['excessive_gap_dist'] }} nt (for genes on the same
+strand) or {{ params['excessive_gap_divergent_dist'] }} (for genes on
+opposite strands) were found.
+{% else %}
+Gaps over {{ params['excessive_gap_dist'] }} nt (for genes on the same
+strand) or {{ params['excessive_gap_divergent_dist'] }} (for genes on
+opposite strands) were found.
+
+\begin{longtable}{llll}
+\hline
+Region & Size & Surroundings & Messages\\
+\hline
+\endhead
+{% for row in excessive_gap %}
+\texttt{{'{'}}{{row[0]}}..{{row[1]}}{{'}'}} & {{row[1] - row[0]}} & {{row[2] | nice_strand_tex}} {{row[3] | nice_strand_tex}} &  {% if row[4] != 0 %}{{row[4]}} ORFs found in this region{% endif %} \\
+
+{% endfor %}
+\end{longtable}
+{% endif %}
+
+
+
+
+\subsection{Unusual Overlaps}\label{excessive-overlaps}
+
+{% if excessive_overlap | length == 0 %}
+No overlaps over {{ params['excessive_overlap_dist'] }} nt (for genes on the same
+strand) or {{ params['excessive_overlap_divergent_dist'] }} (for genes on
+opposite strands) were found.
+{% else %}
+Overlaps over {{ params['excessive_overlap_dist'] }} nt (for genes on the same
+strand) or {{ params['excessive_overlap_divergent_dist'] }} (for genes on
+opposite strands) were found.
+\begin{longtable}{llllll}
+\hline
+\multicolumn{2}{l}{Feature A} & \multicolumn{2}{l}{Feature B} & & \\
+ID & Location & ID & Location & Region & Length\\
+\hline
+\endhead
+{% for row in excessive_overlap %}
+{{row[0].id | texify}} & \texttt{{'{'}}{{row[0].location}}{{'}'}} & {{row[1].id | texify}} & \texttt{{'{'}}{{row[1].location}}{{'}'}} & {{row[2]}}..{{row[3]}} & {{row[3] - row[2]}} \\
+{% endfor %}
+\end{longtable}
+{% endif %}
+
+\subsection{Coding Density}\label{coding-density}
+
+You have a coding density of {{ coding_density_real }}\% which scores 
+{{ coding_density }} / 100 on our scale. Most genomes should be in the 90\% to 100\%
+coding density range
+
+
+
+
+
+
+
+
+\end{document}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/shinefind.py	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,420 @@
+#!/usr/bin/env python
+import re
+import sys
+import argparse
+import logging
+from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+from Bio.SeqFeature import FeatureLocation
+from gff3 import (
+    feature_lambda,
+    feature_test_type,
+    feature_test_true,
+    feature_test_quals,
+    get_id,
+    ensure_location_in_bounds,
+)
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger()
+
+
+class NaiveSDCaller(object):
+
+    # TODO May make switch for different sequence sets
+    SD_SEQUENCES = (
+        "AGGAGGT",
+        "GGAGGT",
+        "AGGAGG",
+        "GGGGGG",
+        "AGGAG",
+        "GAGGT",
+        "GGAGG",
+        "GGGGG",
+        "AGGT",
+        "GGGT",
+        "GAGG",
+        "GGGG",
+        "AGGA",
+        "GGAG",
+        "GGA",
+        "GAG",
+        "AGG",
+        "GGT",
+        "GGG",
+    )
+
+    def __init__(self):
+        self.sd_reg = [re.compile(x, re.IGNORECASE) for x in self.SD_SEQUENCES]
+
+    def list_sds(self, sequence, sd_min=3, sd_max=17):
+        hits = []
+        for regex in self.sd_reg:
+            for match in regex.finditer(sequence):
+                spacing = len(sequence) - len(match.group()) - match.start()
+                if sd_max >= spacing+sd_min and spacing+sd_min >= sd_min:
+                    #if the spacing is within gap limits, add 
+                    #(search space is [sd_max+7 .. sd_min] so actual gap is spacing+sd_min)
+                    #print('min %d max %d - adding SD with gap %d' % (sd_min, sd_max, spacing+sd_min))
+                    hits.append(
+                        {
+                            "spacing": spacing,
+                            "hit": match.group(),
+                            "start": match.start(),
+                            "end": match.end(),
+                            "len": len(match.group()),
+                        }
+                    )
+        hits = sorted(hits, key= lambda x: (-x['len'],x['spacing']))
+        return hits
+
+    @classmethod
+    def highlight_sd(cls, sequence, start, end):
+        return " ".join(
+            [
+                sequence[0:start].lower(),
+                sequence[start:end].upper(),
+                sequence[end:].lower(),
+            ]
+        )
+
+    @classmethod
+    def to_features(cls, hits, strand, parent_start, parent_end, feature_id=None, sd_min=3, sd_max=17):
+        results = []
+        for idx, hit in enumerate(hits):
+            # gene            complement(124..486)
+            # -1      491     501     0       5       5
+            # -1      491     501     0       4       5
+            # -1      491     501     1       4       5
+            # -1      491     501     2       3       5
+            # -1      491     501     1       3       5
+            # -1      491     501     0       3       5
+            
+            qualifiers = {
+                "source": "CPT_ShineFind",
+                "ID": "%s.rbs-%s" % (feature_id, idx),
+            }
+
+            if strand > 0:
+                start = parent_end - hit["spacing"] - hit["len"]
+                end = parent_end - hit["spacing"]
+            else:
+                start = parent_start + hit["spacing"]
+                end = parent_start + hit["spacing"] + hit["len"]
+            # check that the END of the SD sequence is within the given min/max of parent start/end
+
+            # gap is either the sd_start-cds_end (neg strand) or the sd_end-cds_start (pos strand)
+            # minimum absolute value of these two will be the proper gap regardless of strand
+            tmp = gffSeqFeature(
+                FeatureLocation(min(start, end), max(start, end), strand=strand),
+                #FeatureLocation(min(start, end), max(start, end), strand=strand),
+                type="Shine_Dalgarno_sequence",
+                qualifiers=qualifiers,
+            )
+            results.append(tmp)
+        return results
+
+    def testFeatureUpstream(self, feature, record, sd_min=3, sd_max=17):
+        # Strand information necessary to getting correct upstream sequence
+        strand = feature.location.strand
+
+        # n_bases_upstream (plus/minus 7 upstream to make the min/max define the possible gap position)
+        if strand > 0:
+            start = feature.location.start - sd_max - 7
+            end = feature.location.start - sd_min
+        else:
+            start = feature.location.end + sd_min
+            end = feature.location.end + sd_max + 7
+
+        (start, end) = ensure_location_in_bounds(
+            start=start, end=end, parent_length=len(record)
+        )
+
+        # Create our temp feature used to obtain correct portion of
+        # genome
+        tmp = gffSeqFeature(FeatureLocation(min(start, end), max(start, end), strand=strand), type="domain")
+        seq = str(tmp.extract(record.seq))
+        return self.list_sds(seq, sd_min, sd_max), start, end, seq
+
+    def hasSd(self, feature, record, sd_min=3, sd_max=17):
+        sds, start, end, seq = self.testFeatureUpstream(
+            feature, record, sd_min=sd_min, sd_max=sd_max
+        )
+        return len(sds) > 0
+
+
+# Cycle through subfeatures, set feature's location to be equal
+# to the smallest start and largest end.
+# Remove pending bugfix for feature display in Apollo
+def fminmax(feature):
+    fmin = None
+    fmax = None
+    for sf in feature_lambda([feature], feature_test_true, {}, subfeatures=True):
+        if fmin is None:
+            fmin = sf.location.start
+            fmax = sf.location.end
+        if sf.location.start < fmin:
+            fmin = sf.location.start
+        if sf.location.end > fmax:
+            fmax = sf.location.end
+    return fmin, fmax
+
+
+def fix_gene_boundaries(feature):
+    # There is a bug in Apollo whereby we have created gene
+    # features which are larger than expected, but we cannot see this.
+    # We only see a perfect sized gene + SD together.
+    #
+    # So, we clamp the location of the gene feature to the
+    # contained mRNAs. Will remove pending Apollo upgrade.
+    fmin, fmax = fminmax(feature)
+    if feature.location.strand > 0:
+        feature.location = FeatureLocation(fmin, fmax, strand=1)
+    else:
+        feature.location = FeatureLocation(fmin, fmax, strand=-1)
+    return feature
+
+def shinefind(
+    fasta,
+    gff3,
+    gff3_output=None,
+    table_output=None,
+    lookahead_min=3,
+    lookahead_max=17,
+    top_only=False,
+    add=False,
+):
+    table_output.write(
+        "\t".join(
+            [
+                "ID",
+                "Name",
+                "Terminus",
+                "Terminus",
+                "Strand",
+                "Upstream Sequence",
+                "SD",
+                "Spacing",
+            ]
+        )
+        + "\n"
+    )
+
+    sd_finder = NaiveSDCaller()
+    # Load up sequence(s) for GFF3 data
+    seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
+    # Parse GFF3 records
+    for record in gffParse(gff3, base_dict=seq_dict):
+        # Shinefind's gff3_output.
+        gff3_output_record = SeqRecord(record.seq, record.id)
+        # Filter out just coding sequences
+        ignored_features = []
+        for x in record.features:
+            # If feature X does NOT contain a CDS, add to ignored_features
+            # list. This means if we have a top level gene feature with or
+            # without a CDS subfeature, we're catch it appropriately here.
+            if (
+                len(
+                    list(
+                        feature_lambda(
+                            [x], feature_test_type, {"type": "CDS"}, subfeatures=True
+                        )
+                    )
+                )
+                == 0
+            ):
+                ignored_features.append(x)
+
+        # Loop over all gene features
+        for gene in feature_lambda(
+            record.features, feature_test_type, {"type": "gene"}, subfeatures=True
+        ):
+
+            # Get the CDS from this gene.
+            feature = sorted(
+                list(
+                    feature_lambda(
+                        gene.sub_features,
+                        feature_test_type,
+                        {"type": "CDS"},
+                        subfeatures=True,
+                    )
+                ),
+                key=lambda x: x.location.start,
+            )
+            # If no CDSs are in this gene feature, then quit
+            if len(feature) == 0:
+                # We've already caught these above in our ignored_features
+                # list, so we skip out on the rest of this for loop
+                continue
+            else:
+                # Otherwise pull the first on the strand.
+                feature = feature[0]
+
+            # Three different ways RBSs can be stored that we expect.
+            rbs_rbs = list(
+                feature_lambda(
+                    gene.sub_features,
+                    feature_test_type,
+                    {"type": "RBS"},
+                    subfeatures=False,
+                )
+            )
+            rbs_sds = list(
+                feature_lambda(
+                    gene.sub_features,
+                    feature_test_type,
+                    {"type": "Shine_Dalgarno_sequence"},
+                    subfeatures=False,
+                )
+            )
+            regulatory_elements = list(
+                feature_lambda(
+                    gene.sub_features,
+                    feature_test_type,
+                    {"type": "regulatory"},
+                    subfeatures=False,
+                )
+            )
+            rbs_regulatory = list(
+                feature_lambda(
+                    regulatory_elements,
+                    feature_test_quals,
+                    {"regulatory_class": ["ribosome_binding_site"]},
+                    subfeatures=False,
+                )
+            )
+            rbss = rbs_rbs + rbs_sds + rbs_regulatory
+
+            # If someone has already annotated an RBS, we move to the next gene
+            if len(rbss) > 0:
+                log.debug("Has %s RBSs", len(rbss))
+                ignored_features.append(gene)
+                continue
+
+            sds, start, end, seq = sd_finder.testFeatureUpstream(
+                feature, record, sd_min=lookahead_min, sd_max=lookahead_max
+            )
+
+            feature_id = get_id(feature)
+            sd_features = sd_finder.to_features(
+                sds, feature.location.strand, start, end, feature_id=feature.id
+            )
+
+            human_strand = "+" if feature.location.strand == 1 else "-"
+
+            # http://book.pythontips.com/en/latest/for_-_else.html
+            log.debug("Found %s SDs", len(sds))
+            for (sd, sd_feature) in zip(sds, sd_features):
+                # If we only want the top feature, after the bulk of the
+                # forloop executes once, we append the top feature, and fake a
+                # break, because an actual break triggers the else: block
+                table_output.write(
+                    "\t".join(
+                        map(
+                            str,
+                            [
+                                feature.id,
+                                feature_id,
+                                feature.location.start,
+                                feature.location.end,
+                                human_strand,
+                                sd_finder.highlight_sd(seq, sd["start"], sd["end"]),
+                                sd["hit"],
+                                int(sd["spacing"]) + lookahead_min,
+                            ],
+                        )
+                    )
+                    + "\n"
+                )
+
+                if add:
+                    # Append the top RBS to the gene feature
+                    gene.sub_features.append(sd_feature)
+                    # Pick out start/end locations for all sub_features
+                    locations = [x.location.start for x in gene.sub_features] + [
+                        x.location.end for x in gene.sub_features
+                    ]
+                    # Update gene's start/end to be inclusive
+                    gene.location._start = min(locations)
+                    gene.location._end = max(locations)
+                # Also register the feature with the separate GFF3 output
+                sd_feature = fix_gene_boundaries(sd_feature)
+                gff3_output_record.features.append(sd_feature)
+
+                if top_only or sd == (sds[-1]):
+                    break
+            else:
+                table_output.write(
+                    "\t".join(
+                        map(
+                            str,
+                            [
+                                feature.id,
+                                feature_id,
+                                feature.location.start,
+                                feature.location.end,
+                                human_strand,
+                                seq,
+                                None,
+                                -1,
+                            ],
+                        )
+                    )
+                    + "\n"
+                )
+
+        record.annotations = {}
+        gffWrite([record], sys.stdout)
+
+        gff3_output_record.features = sorted(
+            gff3_output_record.features, key=lambda x: x.location.start
+        )
+        gff3_output_record.annotations = {}
+        gffWrite([gff3_output_record], gff3_output)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences")
+    parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome")
+    parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")
+
+    parser.add_argument(
+        "--gff3_output",
+        type=argparse.FileType("w"),
+        help="GFF3 Output",
+        default="shinefind.gff3",
+    )
+    parser.add_argument(
+        "--table_output",
+        type=argparse.FileType("w"),
+        help="Tabular Output",
+        default="shinefind.tbl",
+    )
+
+    parser.add_argument(
+        "--lookahead_min",
+        nargs="?",
+        type=int,
+        help="Number of bases upstream of CDSs to end search",
+        default=3,
+    )
+    parser.add_argument(
+        "--lookahead_max",
+        nargs="?",
+        type=int,
+        help="Number of bases upstream of CDSs to begin search",
+        default=17,
+    )
+
+    parser.add_argument("--top_only", action="store_true", help="Only report best hits")
+    parser.add_argument(
+        "--add",
+        action="store_true",
+        help='Function in "addition" mode whereby the '
+        + "RBSs are added directly to the gene model.",
+    )
+
+    args = parser.parse_args()
+    shinefind(**vars(args))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/test-data/AY216660.fasta	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,815 @@
+>AY216660.2 Enterobacteria phage T1, complete genome
+GCCTGCAATATGGTAAAATAGCACTAAATGTTAAACAAAGAGGATGTATTTATGAGTGAA
+CCTAAGAACGCTCCCGTAGTCCAGGGTGGTAATTTCAAAGAGCTATACAAGAAAAAGTTT
+GGCACTGTACTCGCGAAAAACCGGGCTATGACGCCAGAGCAACTATTCGATCTGTCAGTG
+AAGTATTTCGAATGGGCCGAGGACAATGCGATCAAGGCGTCAGAATCAGCCAGCTTTCAG
+GGTGGCGTTTATGAGTCGCTTGTCCATAAGCCGCGCGTCTTCACCTGGACCGGATACCGA
+CTATTCATCGGTGCAAGCGAGGCTGCAATCATTAAGTGGAAGCGAGAGGAAGAATACAGC
+GAGGTTATGGAGTTTGTGGAATCGGTAATCAACGAGCAAAAATTCCAGCTTGCCGCCAAC
+GGTGTTATTAATGCCTCCTTTATCGGTAAGGATCTCGGAATCGATAAGCCAGCCTCAATC
+AATATCGAAAACTCGTCAGCTTCCGCATCGACAGTAGTCGCCACTACTGAGGATGCGATG
+AAAGAGGCGGTAAACAGCATTCTTGATATGCTTTAACTTTAGGGGCGCGAGAGCGCCCAC
+ATGGGAGACTTAATCATGATTCAATGGGAAGACCTTAACGCAACGCAGAAGTTAGCGATC
+AAGAAAATGAGCGAGGCCAATTTCGAAAAAATGATTCGGATCTGGTTCCAACTTATGCAG
+GCGCAGCAGTTCCAGCCTAACTGGCATCACCTTTACCTATGTCACGAAGTGGAGGAAATT
+ATTGCAGGGCGGCGAGGGAATACAATCTTTAACGTCACACCAGGTTCCGGTAAAACTGAA
+GTGTTCTCAATTCACCTTCCGGTGTACGCAATGCTTAAGTGTAAGAAGGTGCGAAACCTT
+AACGTGTCGTTTGCTGACAGCCTGGTTAAGCGTAACAGTAAGCGCGTCCGTGAGATTATC
+AGCAGCAACGAATTTCAAGAGCTATGGCCTTGCAAGTTCGGTACATCGAAAGATGAGGAG
+ATGCAGGTTCTTAACGAAGATGGAAAGGTTTGGTTTGAGTTGATATCCGCAGCGGCTGGC
+GGTCGTATTACTGGTTCGCGTGGTGGCTACATGACGCCGGGATTCTCGGGGATGGTAATG
+CTAGACGATATCGACAAGCCTGATGATATGTTCTCAAAGGTTAAGCGTGAGCGTACGCAC
+ATGCTATTGAAAAACACCATCCGTTCCCGTCGTATGCATAACGAGACGCCTATTATTGCA
+ATTCAGCAGCGACTACATGCTCAGGATTCAACCTGGTTCATGATGAATGGCGGTATGGGT
+ATTGAGTTTGACCAAATCTCAATACCGGCGCTGGTGACGGAAGAATACGGAAAGACACTT
+CCTGATTGGTTGCAGCCTTACTTTGAGCGTGATGTTTTATCGTCTGAGTATGTAGAATTG
+GATGGCGTTAAGCATTACTCTTTTTGGCCAAGCAAGGAAAGCGTTCACGACCTGTTGGCG
+TTACGAGAAGCAGATCAGTATACCTTTGATTCTCAGTATCAGCAGAAACCGATCGCGCTG
+GGTGGCTCCGTGTTTAACTCAGAGTGGTGGACTTATTACGGCAGCAGTCTTGACGCTGAC
+GAGCCAGATCCGGGTAAATATGATTACCGATTCATCACTGCCGATACCGCTCAGAAGACA
+GGCGAGCTAAACGACTACACGGTATTTTGTTTGTGGGGCAAGAAGAATGATAAGGTTTAC
+TTTATCGACGGCATTCGCGGAAAGTGGGAAGCGCCGGATATGGAGAGGCAATTTACAGCT
+TTCGTCAATCAGGCATGGAGGCACAATAAATCAATGGGGGTACTTCGTAAAATTTATGTG
+GAAGATAAGGCGAGCGGTACGGGCTTAATCCAGAACCTCAGGAAAAAGACCCCGATCTCA
+ATCACTCCATTGCAGCGTAACAAAGACAAAGTTACCCGAGCTATGGATGCTCAGCCAGTT
+ATTAAAGCCGGGCGCGTGGTTCTGCCAGAAGAGCACCCTATGCTTGCTGAAATTATCGCT
+GAACACAGTGCCTTCACTTACGATGACACCCATCCGCATGATGATATCGTCGATAACTTC
+ATGGATGCGGCGAACATCGAATTGCTGACCATTGATGATCCTATCGAGAGAATGAAGCGA
+CTCGCCGGGATGGTTAAGCGGTAATAAATGAGATATAATTAGGGCTGTCAATTGACGGCC
+CTTTTTATTGGAGGAAACATGAAAATTGTTAAGCATGATGGATATAACGATATCTTTAAC
+GGCGGCGCGGACGGATCGCCTAAGCCATTCTTTATGTCTGATGCATCATATCACGTCGGT
+TCTTTCTACAACGACAACGCAACCGCGAAGCGAATTGTGGATGTTATCCCGGAAGAGATG
+GTGACGGCTGGTTTTAAAATGTCAGGCGTTAAGGATGAAAAAGAGTTCAAGTCTTTATGG
+GATAGCTACAAACTTGATTCAAGTCTGGTGGATCTTCTTTGTTGGGCACGACTTTACGGT
+GGCGCGGCGATGGTGGCAATCATCAAAGACAACCGGATGTTAACCAGTCAGGCAAAGCCT
+GGAGCTAAACTTGAAGGCGTCCGAGTTTATGATCGATTTGCTATCACTGTTGAAAAGCGA
+GTCACCAATGCAAGATCTCCTCGCTATGGTGAGCCTGAAATTTACAAGGTATCCCCTGGC
+GACAATATGCAGCCGTACCTGATTCATCACTCAAGAGTCTTTATTGCTGATGGTGAACGA
+GTGGCGCAACAGGCAAGAAAGCAGAATCAAGGATGGGGAGCTTCGGTATTGAATAAGTCA
+CTGATTGATGCAATCTGTGACTATGATTACTGTGAATCTCTGGCTACTCAGATCTTGCGA
+CGTAAGCAACAGGCTGTATGGAAGGTCAAAGGTCTTGCCGAAATGTGTGATGATGATGAT
+GCTCAGTATGCCGCGCGCCTGCGACTTGCTCAGGTAGATGATAACTCCGGCGTAGGCCGT
+GCGATCGGTATCGATGCTGAGACTGAGGAATATGACGTTCTCAACTCTGATATCAGCGGA
+GTCCCTGAGTTCTTATCAAGCAAGATGGACCGCATCGTCTCCCTATCCGGGATTCATGAG
+ATTATCATCAAGAATAAGAACGTAGGCGGCGTATCAGCGAGCCAAAACACAGCGCTTGAG
+ACTTTCTATAAGCTAGTCGATCGCAAGCGCGAGGAAGATTACAGGCCGCTTCTTGAGTTC
+TTGTTGCCGTTCATTGTTGATGAGGAAGAGTGGTCGATCGAGTTTGAGCCTTTGTCTGTT
+CCGAGTAAGAAAGAGGAATCAGAGATCACGAAGAATAACGTTGAGTCAGTCACGAAGGCT
+ATCACTGAGCAAATCATCGATCTGGAAGAAGCTCGCGACACGTTGCGATCCATTGCCCCT
+GAGTTCAAACTCAAGGATGGTAATAACATCAACATTCGCGAACCGGAAGAAACAACCGAA
+CCGGAGCCGGGATTAGGGGAGAAGTTAGAAGATGAAAATTAATGGCGTTGCAACACAGTG
+GCGCTATCCTGAAATGAGCGAGCGCGCAATGTCGCGCTCCCTACAGGATGTTGCAGCCAA
+ACTAACTGAAAAAATGCGTGACGAATTAAAGCCGATGAAATTTGACGCTACCGACGAAGA
+GATAGATCAGACAGAGAGGTCATTGCTTGATTACGTCGAATCACTCATCGCTCCGATTAT
+TGGTTCTCTATCATCCGTTGCGCTCACGATCTATAAATTCAACTCTAAGCAGTGGCTGCG
+CATCGCTCGCAATGCTGGAGGTAAGAAGAATCAAGCCGTGATGCTACTTGCCCTGATTGG
+TCCTACCGCTGCCGAAAGCTGGTACTCAGGACAATATAATCTGTGGCGATCGCAGGTTGC
+TACTTCTATCAGGAAATTTGCCGCCAACATGGTTACTGATTTCACTGATAAACTTCGTGC
+GGCATCCGGTCAGGGTAAAAGCAAGGATTTTGTTGTTGAACTTGCTAAGGAGCGATTTGG
+TATTTACCGGAACTGGGCCAAAAATAGAGCGTCGGGAATTGTCGGAACCTGGAACAGTAG
+ACTGATGCGTCAGCGCATAAAAGACGCTGGTGTCTCTTACTATTTCTGGCGCGGGGTGAT
+GGATTTACGCGAACGTGAAAAACATGTAAGATGGGAAGGTAAGCGCATAGCGGTAGATTC
+CGATCATGTATTCCCTGGTGAGGAATACAACTGCCGCTGTTGGGCTGTTCCAGACTTTTC
+TACAGGAGATTAAAAATGAAGGCAAAGCAAAGATTCGATTCAGTAAAAATCAAGGCGCAC
+TTTGATGATAACGGTTTTTTAGTTGACCGCCCAATCGTGGCGCGAATCGGCGCTCAGGTT
+TACAAAACGCCGCACGGCGATCGAGTTGAGTTCCGTCCGGCGTCCGAAGTTTTCAAGCAA
+GACTCCTTGCAAAGTTTTGCGGGTAAGCCAATTACTGTCGGTCACGTAACGGTAACTCCG
+CAGAATGCTAAGGACGTTGTTGTCGGATCGTGTGCTGGCGCTGGTATTGCTTCAGGGGTT
+GGCGTTGAAGTTCCTTTGAGTATTTACAGCGACTACGCGATCAGCAAGGCTAAAGCAAAA
+GAAGCAGGTGAATTATCTGTTGGTTATACTTCGGTTGATATTGATAAGCCTGGTTGGGGT
+TCAAATGAGACTGGAGAATATATCTTCGAAGAGGATATGAAACAGGACGAAGCGCCGCCT
+GAAGGTTGGGTGAGATTCGACGCGGTACAAACTAATATCAAGGTCAACCATATTGCCCTA
+GTTTTTAAAGGTCGTGCGGGAATTGCTAAATTAAATCTTGATGCCGAACAGGAGTTCCCG
+TATGATAATAACGTTCAATTAACTAACGAGGACAAGCAAATGAAAAAAATTAAGATCGAC
+TCAGTTGATGTGGAAGTAACCGAAGACGTTGCGAACCATATCGAAAAATTAACCGCGCAG
+ATTGCCACCATTCAGGGGAAAGCTGATGGCTTCGAAGCTGAGCGCGATGCGCTGAAGGTT
+AAGGTTGACTCTCTGCCGGAACTTGTGAAGGCCGAGGTAGAGAAGCAAAAAGCCGATGCC
+GCCGCACGCGCAGAAGTTACCGCAGTAGCAGAAACCGCAGGCGTCAAACATGATGGTCTT
+GATATCAAAGACGTCAAGATTGCCGTAGTTAAAGCCATGCTTGATAAAGATGTTAGTGAA
+AAATCAGACGCATATATCGACGCTATGTTTGATGTTGCTAAAGATTCTGATATCATGGCT
+ATTCAGCGTAAAGCAGTAAAAGGCGACTCTATCGAAGGCGGTAAGCCGGAAGAGAAAAAC
+GACGCCGCGCCTGTTACGCCAAATTCACGTTTAAGCAAAGTAATGTAAGGGGAAATATCA
+TGGCACAAATTAATGCATCTTATCAGCGAGATATGGCGATTGCGCTTCCGGGTATGGTTG
+CGGATACTTCAAAGTACAATATTGACGGCGCTTGTGTCGTTAATGAAGGTGATGTTCTTG
+TTGGCGCTGCCGTACAAGTTGTTCAAGCTCAGGCGGTTGATGGTCATAAGTTGGTTAAGG
+CTCTTACTACCGGAACCACTCCTTACGGCGTGGCAATCCGATCTCACTGGCAGACTGTTA
+ACGCTCAAAATCAGATGATTTACGAAGATGGCGGCGCTATCAACGTGATGACTTCAGGCC
+GAGTATGGATGCTTTCCAAATCCACCGAAGCGCCAACTTTCGGCTCTGCCGTTAAACTTG
+ATGTTGATGGTCAGGAAAAATCTGATGGCACGATCGAAACAACCTGGACCTACGCTGGCG
+GTTGGACTAAATACAAAGATATTCAGCTTGTTGAGGTTCAGTTGCATCAACTGTAATTAG
+CGTTTAATATGGGGACTATCCTTTTTTGGATAGTCCTTTTTTTATGGAGAAATCATTATG
+GCTTACGAAAATTTAATGTTGCGCCCGGCGTGTCCGGGAAATCTTTCTGATACTTCAACC
+TACAATATTGATGGCGCTTGCGTGGCTCAAGGTGACATTGAGTTCGGCTCAGCGGTTCAG
+GTTGTCGGCATCGTTGATGGTGTAAAAGTTGTTACGGCGCTTTCTGATGGTGGAACTCCT
+TACGGTATCGCTTTCCGTTCCCAATATGAACACCTGAGCGGTAAAATCCTCGACGGTGAA
+GTGTGCAACGTCGTTTCTCACGGTCGCGTGTGGGCGCTTACTTCTCTTGATGAGGCTCCC
+AGCTTGTTCTCAAAGTTGCAGTTTGGATCTGGTGGGGTTGTTACTGGTGGATCTGGTTAC
+GCAGGATGGACCTTTGCTGGCGGCTTTGTTAAGCACGAAGATGGCTACATTATTGAGGTT
+CGGGTGAAACAAAATGCTTTCATCGTTCCACCGCCGCCGCCCCCTGTCGTTCTTGTTGAA
+TCCGCTACAATCACCACTGACAAGGAAAGCCCTCAGCCAAACAACGTTACGATCCAGTGT
+GTAGCTAATGCTCTTCCGGCTAATGCAACTGATAAGACTGGCAAATGGTCAATCGACGCT
+ACCAATATCGCCACTGTCAATCCGGACTCAGGTCTTGTAACTCCTGTTGGTGGAGAGGTA
+GTCGGTGATTTCAATATTACCTGGACGGCTAACGATGCCAGCAAGACGACGGCAACCATT
+GCTTATCGCGTAGAAGCAGTGCCAACGCCAGAGGTTGATGTATAACATAAAAACACTTTG
+ACGCTTTAACAAAAAGTGCTATTATTGAAGCCGTGAACATAATCACGGTTTTTTATTAAC
+TATGGAGAAGTAATCATGACTACTAAAAAATTTGATGAAGCAGATAAAAGCAATGTTGAA
+ATGTATCTGATCCAGGCTGGCGTAAAACAGGATGCGGCCGCAACGATGGGTATCTGGACC
+GCTCAGGAACTACACCGCATCAAAAGCCAGTCCTATGAAGAAGACTACCCGGTCGGCTCA
+GCTTTACGCGTATTCCCGGTTACAACCGAGCTTTCTCCGACCGACAAGACGTTTGAGTAT
+ATGACCTTTGATAAGGTTGGTACGGCTCAGATTATCGCAGACTACACCGACGATCTTCCG
+CTGGTTGATGCCCTGGGTACTTCTGAATTTGGCAAGGTGTTCCGTCTTGGTAACGCGTAC
+CTGATCTCAATCGACGAAATCAAAGCGGGTCAGGCAACTGGTCGCCCACTGTCAACCCGT
+AAGGCGAGTGCGTGCCAGTTGGCGCATGATCAGCTTGTTAACCGCCTGGTGTTCAAAGGT
+TCCGCGCCGCACAAGATTGTGTCCGTGTTCAACCATCCGAATATCACCAAAATTACCTCT
+GGTAAGTGGATTGATGTATCTACTATGAAGCCGGAAACTGCGGAAGCTGAGCTAACTCAA
+GCGATCGAAACCATCGAGACGATTACTCGTGGTCAGCACCGCGCAACCAACATCCTGATC
+CCGCCTTCCATGCGTAAGGTTTTGGCGATTCGTATGCCTGAGACAACCATGTCTTACCTG
+GACTATTTTAAGTCTCAGAACTCCGGTATCGAAATCGACTCTATCGCAGAGCTTGAGGAT
+ATTGACGGCGCAGGCACCAAAGGCGTACTGGTGTACGAAAAGAATCCGATGAACATGTCC
+ATCGAGATCCCGGAAGCATTTAACATGCTGCCAGCACAACCGAAAGACTTGCACTTTAAA
+GTGCCTTGCACCTCTAAGTGTACTGGTCTTACAATTTATCGCCCGATGACTATTGTCTTA
+ATCACTGGCGTGTAATATTATAGGGGCTAACTTAGTTAGTCCCTTTTTTTATTGGAGAAA
+TCAAAATGGCTAAAGAAAAAACTGTTGTTATCGTAAACGTTGGTGTAGCTCTTCAGATGT
+TCCGTCTTGAAGATGGTTCCTTTGCTAAAGTTCTTCCAGATGAAGAGGTCACGCTTCCGG
+CGTCCGTTCTTGATTTACCTGGTCTGCGTTGCTTAATTGCTCGCGAAGAAATCGAAGTTA
+AAGACGACAGTGCAACCAACCGCAAAATCCGCGCTGAAATGGCAAAGATCACGAAGCCAG
+ATCCGTGGGATAAAATGAGCGTAAAAGAGCTTGAAGACGGCGGCGAATATTAATCATCAA
+GGCGCTCATGTAGCGCCTTTTTTTATGGTGGTAAATTATGAATCAAGAAACTTTAATTGC
+AGTTGTTGAGCAAATGCGAAAGCTGGTTCCGGCACTTCGTAAGGTTCCAGACGAAACGCT
+TTATGCGTGGGTAGAAATGGCTGAGCTTTTTGTATGCCAGAAGACCTTTAAAGACGCATA
+CGTCAAAGCGCTCGCTCTTTATGCATTGCACCTTGCTTTCCTTGACGGGGCGCTAAAAGG
+TGAAGATGAGGATCTGGAATCGTACTCACGACGAGTTACGTCATTCTCCCTGAGCGGTGA
+ATTTAGCCAGACTTTCGGAGAGGTTACAAAGAACCAGTCAGGAGACATGATGCTTTCGAC
+GCCGTGGGGTAAGATGTTCGAACAGCTTAAAGCGCGACGCCGTGGTCGATTCGCATTAAT
+GACAGGACTCCGTGGAGGATGCCACTAATGAACTACTCACAGATTGAAAGGATGGCTCGC
+AAAGGTGTGGCTTTCTTCACCGATCCGTCAAGACCTATGAACCTGATAAAGCAAGGTGAA
+TACGGATATGATGAAAACGGATTCGAGATCCCACCGATGGAACAGGTTATTCCAATATCC
+GGCGCGACGCGAAGACCGAACGCGCGTGAGATTGACGGGGAAACCATCCGCGCCTCAGAT
+ATTTTGGGGATCTTCAATAATGATCATGAAATAAACGAAGGTGACTATATAGAGATTGAT
+GGCATTCGTCATGTTGTCGTTGATGCTCGCCCGGTTCAGGCGTCACTGGAACCAGTTGCC
+TATCGTCCAGTATTGCGGAGGGTATCAGTCGGTGGCTAATTATCAGATTCGTAGATTTCA
+AGGCGAGATTGATGCGTGGATTAATGCCGCTGAAAGCACGTTAGAACATGCTATTGAGAT
+ATTCGTAAGGGATGTTCACGACGCTCTTGTTAGCCGCTCCCCTGTTGATACAGGTCGATT
+CAAGGGTAACTGGCAGATAACTTTTAACGAAATCCCTAACCACGCATTAAACCGATACGA
+TAAAACTGGCGGTGTCGTCAGGGGTGAGGAACAGGCAAAAACTTATGGCATGTTCAGCCG
+TGGCGGCGCGATAACATCCGTTCACTTTTCAAACATGTTGATTTATGCAAACGCTCTTGA
+GTACGGTCATTCACAGCAAGCACCGAGCGGCGTTGTCGGTCTTGTGGCGTTAAGGCTTAG
+ATCATATATGGCTGACGCAATCAAGCAGGCAAGGAGACAGCAAAATGCACTATGAGTTAT
+CAGCGGCGGCGCGAGCCGCTTTTCTATCAAAGTACAGAGACTTTCCTCACTACATGGAAA
+ACAGAAATTTCACACCGCCGAAGGATGGCGGGATGTGGCTGAGGTTCAACTACATTGAAG
+GGGATACGCTTTATCTATCCATTGACAGAAAGTGTAAATCTTACATCGCAATCGTTCAGA
+TCGGCGTAGTGTTCCCTCCAGGCTCCGGCGTTGACGAAGCAAGATTGAAAGCAAAAGAGA
+TTGCTGATTTTTTCAAAGATGGTAAAATGCTTAACGTTGGTTATATTTTCGAGGGTGCAA
+TCGTGCATCAAATTGTTAAACATGAAAGCGGGTGGATGATTCCGGTTCGCTTTACAGTAC
+GAGTAGACACAAAGGAGACTTAATTATGCACTTACCAAATGGCGCACAAATTTTCGTGGA
+AACCTCTCGCGGGGTAGAGGTTGAGGCAACCGCTATCACTAACGCAGAAAATCCTGTTGC
+TACAGTTGCATCTAAGGGTGACTTGGCAAAAGGTGATTACGTTATTGTAACTCAGTCAAC
+TTGGGCAAAGATGGTTAGTCGAGTGCTAATTGTTACTGACGCTCAGGAAACAAGTATCAC
+TCTTGCTGGAATTGACACCTCCGATACTCTTGTTTTCCCGGCTGGCGGCACGATGAGCTT
+TGCAAAAATTACTGGCTGGACTGAGATCCCTTGCGTACAGGAGATTGGTCAGGACGGCGG
+CGAGCAGCAGTATTACACTTATCAGTGTTTGTCCGACGATAAAGAGCAGCAGATCCCAAC
+GTTTAAATCTGCGGTCTCGCTAACTTACACCTTCGCGCACGAATTTGATAACCCGATCTA
+CCCGATTCTGCGCAAGCTGGATTCGTCTGGTCAGGTAACAGCGGTTCGAATGTACGTTCC
+GAAAGCGAGCGAAATGCGCATGTGGGCTGGCATCTTGTCGTTTAACGATATCCCATCCAC
+GCAGGTTAACGAAATGGAAACGGTGGAACTCGCCGTATCCCTGAAAGGTGACTTTACTTT
+CATCTCATCCACTCTGGCATCGCCTGGTGCTTAAATACCATCCACAGGGGGCTTGCACCC
+CCTTATTCATTTCTGTAAAATCATCTTATCAACTTTATTCGATTAACTTTTAACAAAAAG
+TGCTATCAACCAATCAGGAGAAACATCATGGCTAAATTCAATTTCGTGTTGGGCCAGCTT
+CCAGACTTCAAACTTCCGGTGACGTTCACCATGCCAAACGGCGAGGATGCGACTATTATT
+TTTACAGTACGCCACCTTTCCAGTAAAGAAGTGCAAGATATGTATGCGAAGCAGGGCGAA
+ATGAATGATAGCGATTTCATCACTAAGATAGCATCAGGATGGAATCTGGAAGAAGAATTT
+AACGAAGAGAATACGCGTAAGCTGGTACAGTATTATCCTTCCGCAGCGTACAATCTGACG
+GCAACTTACATCAAGGCGCTCGCCGGACACCGCGCAAAAAACTAAAAAGGGCGGTTTATC
+TGTTATATCAGAAACCGCCAACAGAAGAGCAATTAAGATCGGTTGGCCTCAGTCTTTCTG
+ACTATGAAGACGAGGAACCGGAAACGATAATCGGCGATGCTGAAATGGTGAAGGCGTGGA
+ATGTTTTTACGTCAATGCTCACTCAGTGGAGAAGTTCAGGCGCTGGAGCTTATGGTCTTG
+ACTATAATGTTTTGCCTATGTTGTTCAAAATCTATAAAATAGAAGATGAAGAACTGGCAT
+TGCAGGACGTTAGGATCATGGAAGCGAAAGCGCTTGAAATGATTGCTAAGCAAAACAACT
+AAGCCGCCGTTTGGCGGTTTTTTCGTATATAGGGGGGGTTATATGGTTGATAAGGTAGCA
+GGTCTATCTCTTGACGTTGACGTGTCAACAGTTCAGCGCGCTGTCAAGTCACTGAAAGAG
+TTTTCAAAGGCCAACGATCAGGCCGCTGATTCTATGGGTTCTTTAATCAATGAGTCAGAG
+GTTGCAAAACAGAAGGCCAAAGAACACGCCGAACAACTCAGGCGCCAGAGAAAAGAGTAT
+GAGGCCGTGGAGAAGGCAATCGATCCTACAGTATCAAAAATGGAAAGGTTGAAGATTGCA
+TCTCAGCAGCTTGATAAACTCTGGCAGCAGGGAGTCGTTCCAGATGAGACTTTTTTCCGT
+TTGGGTGAAATGCTGGATCTGCAAAACGCAAAACTTGCTCGCAGCCGGGCCATGCTGACA
+GAAGAAGGGCAGGCAGCATTGCAAGAGGCGAAAGCAAAAGAGCAGGCGGCAGTGCGTAGC
+AAGGCGTTCATGGATGCCCTGAATGGTCAAGTTAACGCGATCGGTAAGACTCATGCTGAA
+TTGATGGAACTGAAAGCGGCTGAGCTTGGTTTATCGAAAGAAGCAGCACCACTAATCGCA
+AAACTTAAAGATCAAGGCCGGGCTATGAATGCAGCAGGTATTAGCGCCGGGGAATACAGA
+CAGGCAATGCGAATGCTTCCTGCGCAGATCACAGATGTCGTAACATCTCTTGCATCCGGT
+ATGCCAGTATGGATGGTTGCTATCCAACAGGGCGGTCAGATTAAGGATTCGTTCGGCGGG
+ATCGGTAACACGTTTAAAGTGTTGCTGAGTTATATTAATCCGGTCACGGCAGGTGTTGGC
+GTTCTTGTTGGTTCGTTAGGTATTCTAGCGAAAGCTGGTTATGACTCTTACAAATCAATA
+ACTGATATTCAGAATGCGCTTATTGAGACTGGAGGTTATGCAGGTGTTACGGCTGAAGAG
+CTTGATTCAGTGTCTAAAAAGATCGCGCAGACAAGCAACTCAACCATTGGGAGTATTCGC
+GAGATTGTAACGGAGTTGGCGAGTTCTGGTAAGTACACCCGCGAGCAGATCCAGAACATC
+ACTAAGGCTACCGCAGAGTGGTCAGCGTCAACAGGAAAATCAGCAAGTCAAATTATTTCT
+GAGTTCGAAAAAATAGCAAGCGATCCGGTAAAAGGACTGAAGAAGTTAAACGAGCAATAT
+AATTTCCTTGAAAAAGGGCAGCTTACCTATATCGATACATTAAGCCGGACGAAAGGAGAA
+ACTGAGGCTGTATCAGAGGCTACAAAACTATTCGCAGACGTAATGGAAAAGCGAATGAAG
+TCGATCGCGGATAACGCTACTCCTCTGGAAAAGATGTGGAGCGATATTAAACAATGGGCT
+TCGGACGCGTGGGGATGGGTTGGTGATCATACACTCGGGGCACTAAACCTGATTATCGAC
+GTTGTTCAGGGTACAGTGATTCAGGTTAAAATGATTCTTGCGAAGGGTGATGAATACATC
+TCAAACTTTATCGCCTCAGCCATAAAGGCAACTCAGTCACTGCCTGGAATGAGTGACTTC
+GGCGCTGATGTACTGAAGGAGCAGGAGAATATTGTAAAAAGTTCTCGCGACAACTACGAT
+CAGTTAGCTTCAGATCTTGACGCTATTAACGCTCGTGTAGAAAAAGGCGAGATGGGATAC
+ATTGAAGCAATGAGGCAGCGCCGCACCCTTGAAAAGCAGTACAGTGAGGAAACTAAGGAG
+GCAATAAGGAAAGAAGCAGAAGAGATCGAGAAGCGAAACCGAGAACGAAATAAGCAGTCG
+AAAATTGTACGATCACCGACAGAGCAATTCGACAAGGAGTTAATTTCACTCAGGGCTCAA
+CTTAAGGTATTGCAGGAGCATAAGGAGATCGGTCAAAAACTATCAGCACAGAGAAAGGCG
+CTGTTTACAACTGAGGCTACGATCGCTGTTCTTCGCGAAGCTAGTTCTAAGCGCCAGTTG
+TCTGCGGAAGAAAAAGCGTTGCTGGCAAGTCAGGAGAGAGTTATTGAGCTTGCGAAACAG
+AAGGCCGAGATTGGCGATCAGATTGTTAAGCAACAGCAGTTGAATGATCTTACCGATAAA
+TCTCTGAAGTTTGTCAATGAGATGACGGCGGCGACGGAACAGCTTAACGCGTCACGCGGT
+CTTAGTACTCGCGACATGGAACGACAAGCTGAACTAGCTAAAATCACCACTGATTACATC
+AACTCCGGCGGCAGCGAAGGAGACGAGAAACTTCAGAACATGATTAAGGCGCAAAATGAT
+TACTACGCTGCGGAAGATGCCAAGCGAGCTGACTGGTTGGCAGGTGCTGAAAGTGCTTTT
+GCTGATTACGGTGATGCAGCAATGGATATGTACGGCAATGTTAACGAGATCGCGTCAAGT
+GCCCTTAACGGAATGTCAGATATGATGGTTCAATTTCTGACCACAGGAAAAGCGAACTTT
+GAGGACTTTGCGAAAAATATCATCGGCATGATTATAAAGATGATTGCTCAGATGGTAATC
+TTCAATACGATCTCAGGCATGATGGGCGGTAAGACGTGGAGCTTTGCTGGAGGGGCGTCG
+TCTGGTGCTTCTGCGGCATCACAGGCAACCCCTACACCTGCTGCTTCTGTTTTTAGATCT
+GTATCTTCCGGCGGGGCCGCTGTATCACTTGCTGCCGCAGCGGGTAGCGTGGCAACCTCT
+GGATTCAACGCATCAAACTCGGCGCCAAAGGTGGTAAACCATTCAGGAGGTGGAACGGTC
+GTTGACGTTAGCGGAATGGAGGTGAAAGTTGACAACGGTTCAGATCCGAGGGGGATTTCT
+CAGGGCGTGGAAATGATGTTCAAAAAAATGATTCGTGAGTCTTGTTCGCAGGGCGGCGAG
+GTTTATAATTACATTCAGGAAAAAACAGGAGGCTAATAATGGCGACACTTGACACTTTTG
+GTTGGTGTACGCAGGTTCAAGGGGGCGGTGGCTCCCTTACCACTACCAACAGCGACCGCT
+CTATTCAGTTCGGTAACGGGTACATGCAACTTGCATCATCTGGATTTAACACCACGCGGC
+GTGAATATTCAGTCGTCTATGCCGGGGAAGATTTCATGGCTGTTTACGACTTCTGCAACT
+CTCACCGCATTAAGCCGTTCGCATGGACGCCGCCGGACGGTAAGATCGGGATATGGGTAG
+TAAAGCCTAACAGTTTGGGAGCGAAGCCAGTATCGCGCGACGTGATGGAGATTAACGTCA
+CGTTTATGGAGCAATTTACATCTATGGAGTAACGCCATATAACAAAAGCCCGCCTTGCGC
+GGGTTTTTTTGTAGCTGTAGAATGGTTGCAGGTAAACAAGAGGAAAAATCAATGAGCGAA
+AACAAAAAACTTTATGATGAAGAAAGCGGAAAGAGCCTGTTTCACAACTGCCTTCAATCA
+CTATATCCGGGAGAGATAATCACTCTCATCGAAGTTGATGGTAGTAAGTTCGGCGCTCAG
+GTGTACCGATTCCACGGTGAGAATATCCAGTACACTCCAGAAGAAATCATGCAGGCCCAG
+CAAACTGGAACGCTACCGCCGAAGGAAATTACATTCCGTGGCGAGAAATACGGGGCGCGA
+CCGTTCGGTATATCCGGGATCTCGTTTGACAGTTCCGGGAAGGCAACAAAACCACAATTA
+ACGGTGGCAAACATTGATAGTCGCGTATCTGCGATGATTCGTGCATATAACGGACTAATG
+CAAGCTAAGGTGACTATCTGGATAACTCAGCGTGAGCTTATTAACTCCGATGGCTCAATC
+GCTGATGGAGCTTACCGTAAACTGGTATACTATATCGAGCGTCCGAACTATGTTGATAAA
+AGCGTTGCGCGGTTCGATCTCACATCACCTTACGATATGGACGGCATAATGATCCCGTCT
+CGACTCACGCAAAGCGTATGCTATTTTGCACAACGAGGGTGGTATAAAACAGGGAAAGGC
+TGCGGATACAACGGGCAAAATGGTTACTTCGATAAAGACAATAATCCTGTAGACGATCCG
+TCGCTGGATTTTTGCCCGGGAACGGTAACGGCCTGCCGCCTGAGATTCGGCGCAAACAAT
+GAATTGGATTTTGGCGGTTGCGCTGTCGCTTCATTACAGAGGAAAAATCAATGATTAGTG
+CAAAAATTAAACTTGAAATTATGACTCACGCTCAGGAAGAATACCCCCGCGAATGCTGCG
+GGGTAGTCACCCAAAAGGGCCGCGTGCAAAAATACCATCGCATTGATAATGTGCATCGTG
+ATCCCGAGAATCATTTCATGATGGATGCTGTACAATACGCTTGCATTGAGGACGATGCGG
+AATCAACAACAATAGCAATTGTTCACAGCCACACAGGAGACGGGGCTACAACTCTACCAA
+GCGCTCACGATACGTGCATGTGCAACGAGATGGAAGTTACCTGGATTATTGTTAGCGTGC
+CGGAAGGGGATATGCGATTTGTGAAGCCGGAGAAATTGCCTCTGATTGGTCGTCCGTGGT
+CATTAGGATCATTCGACTGTTACGGTCTTGTTATGGCGTGGCACAAAGAGCACGGCGTAG
+AATTGCGCGATCGCCGATTGAATTTTGAATGGTGGAAACCTGAGTACGGAATTAATCTCT
+ATCAGGATTATTACAAGCAGGATGGCTTCGTTGAGATTCCAGATCAGAATAATCCGTCAT
+TCGGTGATATGGTAATCATGCAGATAGGGCAAAACGTTCCGGTATGGAACCATGCAGGGA
+TTTACCTGGGAGATAATCAGATCTTGCATCATGCCTTCGGCAAGCTATCTCGTCGTGATA
+TTTATTCCGGATGGTATCAGGATCATACTGTTTTAATCGTTCGCCATAAGGATCTTAAAT
+TATGAATGATGTAAAAGTAATTAAATTGTCAGGTTCACTTGGGAGACGCTTCGGCGTCTT
+TCACCGTTACGCTGTTGACTCTTACCCGGAAGCCATACGGGCGCTATCCAGTCAGGTTGA
+CGGATTTAAAGAATACATGCAAAGCGAGGTAGGATCTCGTAGCAAGTTTGCAATATTTGT
+GGATGGCGTTAACGTGGGACACCATGAAGAGGAAAAATTCAAGTGCGCGAAAGAGATAAG
+AATCGTACCGATCCCTACTGGCTCTAAGACAGGAGGTCTATTTCAGGTTGTATTGGGCGC
+GGCAATAATGGTTGCAGCATTCTATACTGGCGGCGCGTCTCTGGCTTTAATGGGCACAAT
+GTCCTCGTCTCTGTTTATGATGGGCGGCGCTATGGTGCTGGGCGGCGTGATGCAGATGAT
+TTCACCGCAGCCGGGTGGCGCAAACTTTGAAGTTCAATCAAGCAAGAATAAACCTTCGTA
+CGCGTTCGGCGGTGCTGTCAATACGACGGCGGCGGGATACCCTCTCCCGGTCCCGTATGG
+ATATCGCGCCGGAGGTGGGGCAACTTTCTCAGCAGGTTCTTATGCCGAGGATATGAGTTA
+AAATTAACCCGCCTTGCGCGGGTTTTTTTTCGCCTGTATAATGAGTCCACCGATAAATAG
+CACAAAAAGGTAAACATCATGATTCAAAAAGTGATAAGCGGATCTAAAGGTGGGTCACAG
+AAGCCTCATAACCCAGTTGAGATGGAGGACAATCTAATCTCAATCAACAAAATCAAGATC
+CTGTTAGCTGTATCTGATGGTGAAATTGACGAAACATTCAGCCTGAAGCAGTTGATGTTT
+AACTCAGTCCCGGTGCAAAACGAGGATGGCTCATTTAACTTCGAGGGAGTAAAGGCAGAG
+TTCAGACCGGGGACGCAGACTCAGGAATATATCAAGGGAATGGAAGATAGCTCTAGTGAG
+GTAACTGTAAATCGTGAGGTTACTACCGATAACCCATACACAATCTCAGTAACCAACAAA
+ACGCTGTCGGCAATCCGTATCAAAATGTTCATGCCTCGCGGCGTACGAATTGAAAGTAAC
+GGTGATAAAAATGGCGTAAGAGTTGAGTATGAGGTGCAACAAGCTGTTGATGGCGGCTCG
+TTTGAGACGGTGCTCACCGATGTAATCGAAGGCAAAACAATGTCAGGTTACGATCGAAGC
+AGACGTGTAAACCTACCTAACTTCAACAATCAGGTGATATTCAGAGTGGTTCGGAAAACT
+CCAGACTCTAACGACTCGAACGTTGTTGACGCGATTCAGGTAAAGAGCTATGCCGAGGTG
+ATTGATGCCAAATTCCGTTATCCGCTGACTGGTCTTCTTTTTGTCGAGTTTGATTCGAAG
+ATGTTCCCAAACCAGTTACCTACGATCTCAATTCGTAAGCGCTGGAAGATTGTAAACGTT
+CCGTCAAACTATGATCCAGAATCACGAACTTATAACGGAAATTGGGATGGAACTTTTAAG
+AAGGCATGGACGAATAATCCGGCCTGGGTGCTTTATGACCTGATGATTAATCAGCGTTAT
+GGCTTGGATCAGAAAGAGCTTGGTATCGCTGTAGATAAATGGGCGCTCTACGAGGCTGCG
+CAATATTGCGATCAGATGGTTCCTGATGGGAAGGGCGGGACGGAACCTCGATACCTTTGC
+GACGTGATAATCCAGTCTCAGACTGACGCTTACAAGGTTATCCGAGATATTTGCTCAATC
+TTTCGTGGTATGAGCTTTTGGAATGGTGAGAGCATTTCGGTAATCATCGACAGGCCGCGT
+GAACCTGCGTACATCTTCACTAACGACAACGTTGTTAATGGTGACTTCTCCTACACGTTC
+GCAAGCGAAAAGAGCATGTACACGACGTGTAATGTGATGTTTGATGATGAACAAAACATG
+TATCAGCAGGACGTTGAGCCAGTATTCGATCGTGAGGCTACTCTACGGTTTGGGAACAAC
+GTTACGAGCATTACAGCGATCGGTTGCACACGTCGAAGCGAGGCCAACCGACGCGGGAGA
+TGGATTCTGAAAACTAACCTCCGCAGCACTACGGTAAACTTCGCTACCGGGCTTGAGGGC
+ATGATCCCGACAATCGGAGATGTTGTGGCAATAGCTGATAACTTCTGGTCAAGTAACTTG
+ACAATGAACCTGTCAGGGCGTTTGCTCGAAGTGTCTGGAAGTCAGATTTTCTTGCCGTTC
+CGGGTGGATGCACGCGCTGGTGACTTTATTATCGTAAATAAGCCAGATGGCAAGCCTGTG
+AAGCGCACAATCTCAAGTGTTAGTGCGGATGGTAAGACTATAGAGGTTAACATTGGCTTT
+GGCTTTCCTGTGAAGCCTAACACGGTATTCGCTATCGACCGCACCGACATTGCGTTACAG
+CAGTACGTCGTGACAAAAATCGATAAGGGCGATGATGATGAGGAATTTACCTACAAAATA
+ACGGCGGTGGAGTACGATCCTAACAAGTATGATGAGATTGATTACGGAGTTAACATCGAC
+GACCGACCGACGAGCATCGTTGAACCAGATCAGATCCCTAGACCGAAAAATGTGCAAGTA
+TCCTCAGAGTCGAGAATCGTCCAGGGGATGAGCGTAGAAACGATGATTGTTAGCTGGGAT
+AAAGTTCCGTACGCTGTTTTCTATGACGTCCAGTGGCGAAAGGATAACGGCAACTGGCAA
+AATGTACCGCAGACAGCAAACAAAGAGGTATACGTTGAAGGTATTTACGCTGGCAACTAT
+CAGGTTCGCGTTCGCTCAGTCGCTGGTTCGGGCACGACTTCAGGCTGGTCAAATATCGTC
+GCGGCAACGTTGACGGGTAAACAGGGTGAACCGGGCCGACCGATTAACCTCACAGCTACG
+GATGATGTTGTTTTTGGTATCCGTACAAAATGGGGGTTCTCTGATGGTTCTGGAGATACA
+GCCTATACAGAGTTGCAACAGTCACCGGATGGAACAGTGGATAACGCAAGTTTGCTTTCT
+TTGATTCCGTATCCGCAGCATGAGTATTATCACTCACCAATGCCTGGAGGGAATATTGTT
+TGGTATCGGGTAAGGACGGTTGACAGGATCGGTAACGTGTCTCAGTGGACTGATTTTGTC
+AGAGGTATGGCATCAACAAACGTTGACGATATCATTGGGGAGATTTCTGTTGATATCGAA
+AACTCACCTGGTTACGAGTGGCTTGTTGATAACGCAACAGACAACGCGGCGCAGAACTCA
+GCTAACGCAGAGGCAGCAATAGAAAACGCGCTCGCCAATGACAAAGACGCGATCTACATG
+AAGAAGGAGAACGGAAAACGAAAAGCTGAGTACACGAAATCACTGAAACTTATTGCTGAT
+GAGACGCAGGCACGAGTGACGGCGATCGAGCAATTGAAGGCAAGTTTTGGCGATCAGATT
+AGCGCTAGCAACAGCGAGCTGCGTGAGGTTATCGCAACCGAGACTGAAGCACTATCGCGT
+GAGATTGACCAGCTTAAGGCTCAGATTGGTGACGATATTCAGGCAAGTCTGACTGATATT
+CGGGAGGTTATCGCAACCGAGACTGAAGCACTATCGCGTGAGATTGACCAGCTTAAGGCT
+CAGATTGGTGACGATATTCAGGCAAGTCTGACTGATATTCGGGAGGCTATCGCGAACGAG
+ACTGAGGCTAGAACGCAAGCTGACTTAACACTTAGCGCGCGGCTTGGAAATAACGAGGCG
+GCACTTGCTCAAAAACTAGACTCGTGGAGTAACGCGGATTCTACTGGTGCAATGTACGGT
+GTCAAGTTGGGTCTGAAATACAACGGCCAGGAATACAGTGCAGGCATGGCTATGTCTCTA
+GTTGGTTCCGGAGCTGCGGTTAAGGCGCAGATTTTGTTTGAGGCGTCACGATTTGCCATC
+ATGACTGGAATGAATGGTCAGACTCAGTACCCCTTCGTTGTTGAGAATGGTCAGGTTATT
+TTAAGTAGCGCGATTATCAAGAACGGATTCATCACCAACGCAATGATTGGAAACTTCATC
+CAGTCGAATAACTATGTATTTAACCAGTCCGGATGGAGGCTTGACAAGGGTGGAACATTC
+GAAAATTACGGAAGTGACGGTGAGGGTGCAATGAAGCAAACTAATACCACAATATCTGTT
+AGGGATGCGAGTGGTCGCCTGAGGGTTCAGATTGGCAGGTTGACTGGCTCATGGTAATAT
+CAAGGGCATCGAGAGATGCCCTTTTCTTTTGGAGGATTTATTATGGCGTACGGTATATCA
+ACTTGGGACGCAAATGGCGTTTATAATAACTATGGAATTAAGCCTATTACGGTTGTTGGT
+TGGAACTTTTTGTCAGCAGGCCAGAATTCAGCATCGTTCAGCTATCAAGTGCCTCCTGGT
+ATGCATGTGAACTACGTTATAAGCCTTGACGATGGCGCCATTAGTGGGCCTGGCAGGAAA
+ATTATTGCTAGCGGTAATACGATAACAGTAACGCCAACAAACTCACCTGGGCCAAACGTG
+TACCCATCATCAAACTGTTACTTAATAGCATATCTGGAGAATGATTAATGTCATACGGTG
+CTTTTATAGATGTAAACGGAAACCCATTCATAACCCCGTTATCCACGCCATTCGCTTTAT
+ATGCGAGAGGGGAAATTCAATCAGTAAATGTTAGTGGTTCACAGGTTGCGGAGAGATACG
+TTCGGATACCTACAGGTGTTCCGGTTATAGCTTTTTGCAAAACAACAAACACGCAGCAGG
+GGACCGCGCTTTCAGCCTTTACTTTCAGAAGCGGACCCAATGTTGGAACTGTTTATATAA
+GGGGGACAAATCCAGCAAACCAATCATACACGCTAACATACTACATATTTGCCATATTTG
+AGCAGTCACTACCGAGATGGGGTATGGCAATATGGGATGCGTCAGGAAAGCTAGTGCTGA
+CAAATGAGACAAAAGTCCTTAGTGATTTGGTTACAATCGGCACTCCTGGATACGCTGGCG
+GTGGATTAAACATAGACACAACACTCAGCGGAAGCTACGCGGTTGTTCCAACTATACTTG
+GCAACTATCAAGTTGTTATTGGAAGGTTGCCAACTGGGCAACCAATAATAGGAAACTCAA
+CAGCAGGCAGTTCATGCAGGTACAACGGGAGCACAACGAGAATAAATGCAGCAGCAACCA
+CTGCGGCAGGTCAGATAATGAACACAACGAATAATGGAAATATTATAACAGCAATCAAAA
+CGGCAGCATACGACTAAGCCCCTTGCGGGGCTTTTATTTTATATCGAGCAATCGTGAGAT
+TTGAAGTTTTTCTCTGATACATAGTTGAAGGAGAATGGGTATCCAGCACGCAATACCATC
+TCCTTTCCGCGCATCTTAGATCCAAAAACGTAAACCGAGTACTCCGCGCCGCCTGATTCA
+TAAATTGCCGTGCAAGTGCGCTCAGGCATCGATGAGCAACCAGTCAGGATGAATGCCGCA
+GCGATAATGGTGATTAACTTTTTCATTTGTATGTCCTCGTCGTTAGTGTGATTGCATTGT
+ATGTCGCATTTACTTTTATTGCAATAGAGCGATTACAATTTTTTTCGTGTAACAGGCGTA
+TATTTTTGTAACCGGAATGGGTGTTACAAAATCGCCTCCATCCGACCGCAGGGAGATATG
+ATAAAAACTCTATATAATATATATAGATAGATAATATTTAATTTTAGCTTTATATATATA
+ATTATTGTTGTGTAACAGTTGTATATCGTGTAACAGGTGATTTGATTGATTCGTCAAATT
+TCTATCATGTATGTTCAAAATTTAATCAATCTGGATTCTATTTGTAGGTATCTCTGTATT
+TCTAATAAAAAGCGGTTACAAGTGTTACGCAATAGACAGCGCATAAAAATCTACTTAAGC
+CATTGATTCTGTTGATGCTGGTTGTAACTTGAGCAATATAGACACGCAATTACACACTGA
+TTACATGTATTCGATTGACTAAACGCTGTTAATGGCTATAATGGATTCATCGTAAACGAA
+GGAGATAAACGCAATGTTCCAGGTATTCACATCAAGCCAGCTTTCTAACGACGAGTATCA
+TAGAAACGAAGGTTGGGCGTCAGAGTATGTAAGCGGATCGAGTCTTGCAGAAATTTATCA
+GACCTGCCCTGCTAACTGGAGATTCAAGAAGAACGAGACAACGAAAGCTCTGGAGTTCGG
+TACTCAGTCGCACACCAACTTTGAGAGTCGAGATCTGTTTACTGCAACGTATGCTAGATG
+CCCTGCTCCGTCAGAGTTTAAGGATCTGATTACTTCGCAGGCGGCGCTGGCAGCAAAATT
+AAAATCATTCGGCCTGAAAGGTACATCCGGTAAGCAGTACCCGGACCTCATCAAAATGAT
+GGTTGATTGTGGTGAAGAACTCAACGTTCAATACCTGATTGAACTGATCGCAGAAGCTGA
+GGCCCGTGCTGAAGGAAAACAACTTGTTGACGCGGACAAATACGACGCTTGCATGAAGAT
+GAGAGCCATCCTTGAGCAGAATCCCGATCATGAAGCGTGCATCAACAGTGAAACGGCGCA
+GCGTGAGATTTCAATCTTCGGTGAGATATCCGGCGTAAAAGTTAAGGTTCGACTTGACCA
+TCTGGACTACAAAGAGAATGTTCCAGGTCGTGTCCTGACTGGTTATGATGAGAATGGCGA
+TCCGGTATTTGAAGACGTAATTTTCCCGGAAGCACTGATTATCACAGATTTCAAAACTAC
+GATGAGCGCCAACCCGTTAGAGTTCCCGAGACTGGCATACAATCACGGCTATTACCTGAA
+GATGGCATTGCAGCATGACCTGCTACGACGCGCAATCCAGGCTGGAGCTTTTGAAGGTAA
+CTTCCCGGAAGACATTCCGATCGTGGTTCGATTGCTTGCGCAGGAGAAAAAAGAGCCTTA
+TATCGCACTGGCTTACCGTATGACTATGGAGCAAATCAGGATAGGTCGTAACCAGTACAT
+TAGCGTAGTCCACACTTACAAGGCTTGCTCTGAAATGGATGTTTGGCCTGGGTACGCTGG
+CGACGCAAGCGAGATCGAACTTGAAACGCCATCATGGGTGCGTTACCAAAATAAGTAAAC
+GGCATAAATAGCTAAACAAATAATTAATGAGGTGTTATAATGCACCTCATACACCAATCA
+GGAGAAGTTAAGATGCAATTATCACCAGAAACAAACGAAATCCTTCCCGCACTGTTCAAT
+GCTCGCAATAAATTTGCTAAAGCAAAGAAGGACGCAAAAAACAATCACCTGAAAAATTCA
+TACGCAACTCTTGATGCAATGATGGCTGCGGTTAGTCCGGCGCTAACCGACAACGATATT
+ATGATCCTGCAATCAATGCTGGACACCAGCACTGAAACAACCTTCCATCTTGAAACGATG
+CTGATTCACAAATCCGGGCAGTGGGCCAAATTCTTCATGATGATGCCGATTGCAAAGCGC
+GATCCGCAAGGCGTAGGTTCTGCAATGACGTATGCTCGACGTTACTCATTAGCCGCAGCG
+CTGGGGATTAGCCAGAGTGATGACGATGCTCAGCTTGCAGTGAAATCCGTCAAGGACTGG
+AAAAAAGAACTTGATGCGTGTGAAGACATCGAGTCACTGAAAGATGTATGGGCCAACGCT
+TACCGCCAGACTGACACGGCGAGCAAGTCAATCATTCAGGATCACTACAACGCATTGAAG
+GCTAAATTTGAGATCGGTAAAGCTCGCGGCATTCGCCCGGCGCAACCGGAACAGAAAAAA
+CAGGTTGAAGCAACAAGCGCGAAGCCTGTACAATCCCAATCAATCACCAACTTCGAATAA
+TCATCAGGGCGGCTTAGGTCGCCCATAAAATTTAGGAGAGAAAAACATGCATATTATCAC
+TGGCGAGATCCGCAAAGAACCAAAGATTCTTGAACGTAACGGCGGCAATACTTATATTAT
+CGAACTGGCAGAAAGCTATAAGCCTCGTGATGGCGATCGCGAATACACCAACTACACGTT
+CTTTTTTAGCGACGGTGGGAAGCCAGGCCTTGCTGACTGGTATCGTGAAGCGTTCCAAGT
+TGGTCGAGTTATCTCAGTATCGTGCGAGACGTTGAAGATCTCATCACGCGAACACAACGG
+AATGATTTACAATTCATTGCAGGCGGCTGACTTCCCTAAACTAGTATTTAGTCAACGAGG
+TCAAAGCAACCAGCAACAACGAGCGCCTCAGCAACAACAGCGTTCTCAGCAGCAATCACA
+ACCACAACCAAATCAACAATCAACATTTGACGACGATATTCCATTCTAAAGAGAAGCCCC
+GCATTGCGGGGCTTTTTATTACTTCATCTCAATTGCCTTCGGAAATGAGTCTACGTATTT
+TTTAAGCTTTCAACAAACTCTCCAGATACCATTCTTTCATGATTTCTTTTGCACTCTTCA
+ACCTTCTTGTTGTAAATCGAGTCATTAGGCATCTCAACTCGAACGCTAACAAAAACATCA
+CTCGGAATGTCAATAGGATCTCCGTCGCTATACCCATCCTTATGATTTCTTGCGAATGAA
+GGCGCGTTATCATGAGTTCTATGATAAGTCTTGATAACCAAAGACCCATCAGGGTTAACA
+TCATAATCTACCCACAATATAGGCTGTCCGTTAATGTCTTTTGGTATCTCTATACCACCA
+TTAACACCACCCCACGCTAGATCAGAATTTAGTGATAAACACCCCCTTATCAAATACTCG
+CCAGTAGAAATCCTCTCAACTGTACAACCTTCGCTCTCGTCGTTTGTTCTATGGCTCCCG
+TTGCCAAAAATATCAACGATGGGCGAGGCTCTTTTTATAAATCCGTTAGAGTCAACGGTT
+GTATTGCCTGTATTCCATATAACCTGGGTAGATTGCCATTGCCCCGCTCCTGTCTGTTTT
+CTGAGCCTCACATCCTTATAGTGTGGAGAGGTCATTGATGCGTCTTCAATAACAAAACCG
+AAAGCACTACCATCTCCTTGACCGTTTCTATGACGAACAGATATAGCTGTGACCCATCCA
+ACAGAATTAACATTGTAACTTGCAAGGCTGCTAGGCTGAGATGATAAGGCGTTAACATAA
+TCTACCCAGCTTGATTTCTCGTTAGATGCCGGACTTGATACAACGTTACCGGAAGAACTT
+GCCTGAAACGGTATCCACGGAGAAAACTTTGCATTATTGCCGTTTTTCATCCTTATGTAA
+AAAGTGCCGCGACCCGCTCTTGCGTATTCGTTTTGAGAATTTCCAGATTCTGGGGCAGTA
+CCCAAATAAGTAAAAGGCGTAAATCTCTGAATGCAAGCGCTTGCACCGTTAGCTCCATTA
+GGAAGAACCTCAAGCATACCTGCCGTAGCCTCAGGATAGCCCCTACTAGCGGTAGCTAAA
+GCACTTGATGATTGCATATATATTCCAGGGCCATCACCTTCACCAGTTATTGAATCAAGA
+TGTTTTTCCCCTAAGTTAGTTCTATCAAGTTTGTAAACCTTTAGGCTTGTCCTTGCCTGC
+TCCTCGGTAGTTCCACCAGTGCCACCAAAATTAACCCCTAGAGCCTTGTTTTCACTTCCA
+TCATGAGTCATGGCGCCCCAGTCGCCGCTATCCTTGACGATCAGCTTAAATCCGGTCCTA
+TCGTCATTTGAATAGATTTCCGTTAAATCTCCGCGAGGTTGATTTAATCTATTTACTGCT
+AGATTTTTCTTTGATTGCTCCTTGTCAGCAAGGCCAGAAAGATTTCCGTCCTTAGTTAAA
+AGATTGTCAGCATTAACGCTATTTGCTGCGTCCTGAGCTTCCTGAGCACTTACCGCTGCC
+GCGTCTTTAGCGCTAACCGCCGCATCGCGTGCTGCCTGGGCATCGTTTTTTGCTGAGGTT
+GCAGTTTGCGCCGCAGTCTGAGCATCATTAACATAGCCAGATAGATCGCCTTTAGCGTCA
+TTGATTGCCTGAATGGCCGCCGCCTCTTCAGAGTTAATATGCGTAACGGCTGAATTTTCT
+TTTTGCTGCACATTAGTGATAGCCTTGTCTTTTGCTGCATTAATGCTACCAATAGCGCTA
+TCTGAGATTTGCTGCGTCTGGTTTTTTATTGAGATAGACTCGTCACGCGCTGAATTAGCT
+GAGTCGCGAGCAAGCTCCGCTTGATTCTGCGCAGCTTCTGCGGCTGATTTGTTTGACTTA
+ATTTCTTGAACGATATTGTTCAGGTTGTCCATATCAAGATCGGCAATAATATCCAATGCC
+GATGCGATCTCAGTCTCTTTGCTCTGGTAGTACCGAAGCGTCTCAGCGACGTTTTGCGCA
+AGACCGTTAACGGTCAATGAATCGTTAAGCAAGATCACATACTTACCGTCAGCGGCAGTC
+TGACCATCCGTAGAGATAGCCTTTAGCTCTGTGTCGCTCACAATGTCGCTGATTACCGCC
+AGTTTAATTGGTTGCTCCAGGAATACGATAGTTGCACCGACTCGAATCAGAGCAAGCTGA
+TCTTTCCATTTTGTATCGGTTCCGTGAACCGTACCGTCTGCATCCATTGATGCAGTACCG
+CGTCTATATAAAGCCATAGTATAAACTCCTTAAGTAAATAGCACGAATTGCTAAGCAATG
+ATTTTATCATTGTGATACCTGCCTTGCAATGGGCAATAAAAAACCGCCCGAAGGCGGTTA
+GAGTAGGTCACTTGCTGAGAATAATTTGGCTTGCATTCCTGAGCGGTATCCTCCGGGATT
+CGGTATGATTATTTTTAATTGTCTGTCGTCTGCCATGTACAAGGTATCCTTGTCTGAAGG
+TTCACACGCCACCCTTACCTCTCGATCACCTTTATACCTGTATGCAACCATTTCGAGGTT
+TTCAGGGGTGAAGCAGGCCCATACCTCTTGCTTTGTGTGAAATGCAATATGCTGAGCATC
+AATCCAGTTAAGGCATAAGTAGATCGCCTTTTCAGTTTTACCAGTCACGGCAACCGAGCA
+ACTCGTGTATTTCTTCGCATAAAATGACTCTCTTCCTTCTTCATCAATAATCAAAATATT
+GCAAAACTCATCATCAAGCCCATCCTCGTGCACAAGTTGACACGGTATAGTGTGAAATAC
+GCTCTCTCTTCCATCCTCGCTACGCTTTATACCGACATCGAATGATTCGGTGGGTAGTGA
+CTCAAACACGCTCAACGGCGTATTTACACGTTTCTCTGTGCGCTCCATAACCTTCATCAC
+CGCCTCATGTTCAGCCATCATGACATTGACACCTGACACCGGAGTCCGGCGCGCTTTCTT
+GTTTGCCTTGACGATGTATTCCTGCGGAACCTTTCCGAGAAATCTTCCCAGGATGTTTAC
+GCATTCGCTGTACGGCATACCAGTCAACTTCATCAGCCAGCCAATACCAGAGTCGTTACC
+GCATGAGTTGCAGATCGCACCGCCGTCGCCCGGGGTGTTCAGGTTATCAGTCCAGCGAAA
+TCGGTCTTTACCGCCGCAGTTGGGGCAGGGTTGGTGCTTCTTATTAAAAACATTATTCGG
+CAATCCGCAGATTGATTGGAAAGCCTCGCGCCATAACCCCTTCATGTACGGTAAAACGTC
+CTCTTTCTGAAACATCATAAATTCTTCGTTCACTTCCAGATCTCCAAAATAAAAAACGCG
+TAGAAGGATGTTAACCCGCTACGCGTCGTTTGTTTTAACTAAAAATGCTATTGGTCGTTC
+CGATATCTTCCCTCTTCACGACGCGCATAAATTTCTTGTTCTTGCATCGCTTCTCAAGGC
+ACTTGCCGTTACCGTCAAACTTCAGGTCGAATCGCAACCATGCGGCCCTAAACCCTTTGC
+ACCCCTGTCGGCGGTAAGCACGATGCGCAGCTTCAGCGCCTTGCCATGAAATCATATTGC
+GTTCTTTCCAGCCTTGCACCGTCTGATTGCTAACCTTCAGAGCCTTTGCGCAGGCCGCCG
+GGCCGCCGTAATATTCGATAAGGGCATCAAGTCGCGCTCGCAGTCCGGCGCGCGTTTCTT
+CTTTATGAATATAGAACCCGCAACGCTGGCGAGGCTTTTTATCTTTACCGCGCCGTGTTC
+CGTTGTTACCATTGATGTGACGTTTATCGATTTCACCAGTTGACTCTGCGATACGTTGAA
+TACTCATCTTGATTCCCCTATAGCACTTTTTGCTAAAAACGTTTACTTTATGCCGTGTAT
+TATAGCGTAAACGTTACAACGATTCAAAGGATTAATAGCCGTGACAATGAACATTAAAAA
+ACAGATTGCATTACTTGGCGATGACTATATAAAGAGAACTCAGGAGCGATTTACTGTTGG
+TGAGGTTGTTCCTTATCCGTACCAGGTTGTTGCTTATGCCGAGATCGCGAAACGCCTATC
+AAATTACGAGCATCCATTCTTCGTTAAAGCGTCTGTATCCGCAGGTAAGACAATCATCTT
+CGCTATGGTGGCAAAGCAGTGTCAGAAAATGGGCTTAAAAATGCTTGTCCTGGCTCGTCA
+GGGTGAGATTGTCGATCAGGATAGCGAAGAGATCGACAACTTCGGGGTAACGAACTCCAT
+CTTCTCAGCGTCACTTGGAATCAAGTCCTGCTACTTCCCGATCGTGGTTGGCTCAGAGGG
+TACTGTTGCAAATGGCCTCGACAATGAGTTAGCTGATTTCGTCCCGCATGTAATTGGGAT
+CGACGAATGTCACCAGGTGGATTGGGAAGACCTTGCGCAAGCCATCGAGGGTAAGGAAAC
+AATGGAACAAATGAGGGGCGAGAAAGGGAAAATTATCATGGACGGAGATATTCCCCTGAT
+TGGTAATGATGGAAAGCCTTTGCTTGGAACTAAGCGCAGTCAGTACACGATCGTAATCAT
+GGAAATGATGCGGCGCTGTAAAAAGGTTCACGGTCACGATCTCAGAATATTTGGTATGAC
+TGGATCTGAATTTCGTGGCGTAGTTCCTATTCTGGTAGAGAATCCGAAAGCATTGGGATT
+CTGGCGTGAGCGAGTAACTGATATCGACACAAACTATCTGATTGAGTTCGGCTCTGTCGT
+TCCGACTATATTCGGATCAACAGACGGAGTTCATTACGATCTTGATAAGTTCAAGGCGTC
+TAGCGAGGACGGAGTGCAGGACTTTACAGAGAAAGACATGAAGGCTATGGAAGATGAGAT
+CCTTCATGATAAATCTCTGACTCAGCGAATCATGCAAATGGTCGCCAAAAAGGCAGAAGA
+ACGCAATGCGGTCCTGATTACATGTGCTGGTGTGCGCCACTGCAAAGAGGCAGCGGCAGC
+ACTTCCTCCGGGAAGCACCTATGCAATTATTACTGGCGACACAGACAACAAAGCGCGCAA
+GAAGATTCTTGACGATGTAAGGGCCGGAAAAATTAAATACACCTTTCAGGTAATGGCGCT
+CACTACTGGCGTTAACGTTCCAAATTGGGATTTCAGTGTCATACTCCGCAAGATAGGATC
+GCTCACTCTGTTGATTCAACTTTTGGGTAGGGGTATGCGACTGCTTAAATCCTGGCAGGT
+TGCTGAAGGAATGGTTAAGCAGGACCATCTGGTATGGGATTTTGCAGGTACGATGGATGA
+GCTGGGTCAGCTTTATTTCGATCCGATACTTGAGCAGGCGCAATTCCAGAAGCGTTTTGA
+AAACGGCAAAGATCCGAAAACATGTCCGAAATGCGGTTGCGTAAATAGCTTCTATGCTCG
+ACGATGCGTTAATGTCATTGATGGTGAGCGTTGCGATCATTTCTGGACTTCTCAGATTTG
+TGAGGACCAGGTTGACGAGCGAACCGGGAAAATCCTTGTTAAAGGATGCGGTGCAGAGAA
+TGACGTTGTTGCGCGAGTCTGTCGTTGTTGTGATGCTTCTCTTGTCGATCCTAACCTGAA
+GTTATCCGGTAAGGCGTACACCAAGAATGACTGGTATGAAGTAAAGAATTTTGAGGTTAC
+GCTAACCAAAAACCAGAAAGGCATAATATACAAATACACTCTGATTAACGACGATGGTGA
+TGAGTTCAAGGCGTATGAAAAATTCTTCCCCGAGTCTGACTCTAAGATTTGCGGTACGCT
+ATGGAAAACTAAAGGTGTACTTCCTCATGTGTCAGATCCTAAAATGCGCCGCTACTTTAT
+CGGAATGAAGAACGCCATCAAGATTTTGCAATACTCACATCATATTGCTCACCCGGTGCG
+CGTAACTCATCGTCGCAACCAGAAGAAAGAAGATATCATCTCACGCAAAGACTTCGGTAT
+GGAGGATATCCCGGAATGATTACAGACAAAGGTGATTATTTAGAATTTTACGAGAGAGAC
+ACAAGCGACACTCGAAAGGAGGATGCTCATCAGGTGGATTGTGTATCTTGGCTGAAATAC
+AATTTTCCTCACCTTCTATTTTGGCACACTGTCAATGAAGGTGAAAAAACAATCACATCG
+GCGCTCAGGGATGAGCAGGCAGGATTACTTAAAGGCGTGTCAGACTTCGTTATCCTGATT
+GGTGTTAACTCACGATACCCGTTTGCAGCAATCGAACTTAAGCGGGTTAATAAGTCAGGC
+AAAGGAAAGGCGTCACCAGTCAGCGACAAGCAAAGGGAATTTCTCCAAAAGGTCCGGGAG
+CGTGGCGGCTTCTCTGCCGTCGCATACGGATTCGGGCAATTCAAGATCGCAATTTACGAA
+ATGATGAAATAGCACTTTTTGTTAAAACTGCCGGGATGGAATCTGGCATTATTATCTCAC
+CAAAACGAGAGGAATAAAAATGAAAGACTTTAATGATATCGAAACTATCGACTTTGCAGA
+AACTGGTTGCTCATTCACTCGCGAAGCAATAGCATCAGGCGGTTATTATCAGGCATTGAA
+AACGCCAACCTGTAAAGAGATTTCAGGGCGTCGATACAAGGGGACAAATACCCCTGACGC
+TGTTCGTGATTTATGGTCAACTCCGCGAGAGGTTATTGCATACCTTGAGGGTCGTTATGG
+GAAATATGATCTCGACGCTGCGGCAAGCGAAGAAAATAAAGTTTGCGAGAAGTTTTACTC
+TCAGGAAACAAACTGCTTAAAACGTTGGTGGGGAAAGAATAAGCACGTTTGGTTAAATCC
+TCCTTATAGCCGACCTGATATATTTGTCAAGAAGGCCATTGAGCAAATGGAGCACAACAA
+TCAGATCGATATGCTTTTACCTGCAGATAACTCTACTGCGTGGTTTACTGAAGCGCGGCA
+GAACGCAGCTGAAATAATCTGGATTGAAGCGGACTTGACTGAGGATATTGACGGCAATGA
+ATACGCACGATCCGGTCGCCTGGCTTTCATATCCGGTGAAACTGGAAAGGCCGTAGACGG
+TAATAACAAAGGTTCGGTAATTTTTATTATGCGCGAACTTAAAGAAGGTGAGGTGCAACA
+GACTCACTACATCCCAATCACAAGCATTTGCCCTTCGGTGAAAAACAAACGAGCAAAGGT
+GAGGAAAGTATGATGAGCGAAAAAATGGTTCCTGTTAAATTAACTGAGCAAGGTTTATGG
+CTACTTTATCGAGCTACGTGCTGCGAAATTATGGAGCGAAACGGATTGACTCAGGATGTT
+ATTGGTTGCGATCTGTGGGAGTTCACTAGTTCTCTTGATATGTCTTTCGATGAGATAAAA
+AATGAATACATAGAGAACTGGCCTTCAATCATACAGAAAGACGTGGAAGAACTTAAAGCT
+GATACAATCGTACAGCACTAATTGCTAAAACTACCCGGCGAAAGTCGGGTATAGTTATTT
+CATAGAAACGAAATGAGGAATCAGAAGATGGCACGCATTAACGCAAACTTTTTCAATATC
+GCTCAGCAGTCCGCAAAAATGGCTGTTCATATTACGAACAAGCAAGGCGGCAACTTCGAT
+TGGGATATTGCTATGAACTTCCTTAAAATGTCTTATTACCGTTGCTCAGTTGAAGAAGTC
+GAAGGCTTCATCTCTGACGTGGAGAAATTAACTAATGCTGATAAAAAAGCAAGGTAAGCG
+CGAAGTGTGGGAGCACGCAAAGGAATGCGGCATCTCAGACGATATAGCATTAATTGCTAA
+ATACTTTGATATAAAGGATGTTAGCATTATATCAAACGGCAAGATTTCATTTATGGAAGG
+TATGCCGAGAAAAATGCAAAGAGTTCCAGCCACTCCATCACTTGAGTTTTACCGCGAAGA
+GGGAAAGAGAATTGAGCGAGAAAGAAAATCCACAAAAAACGGCAAGTCTTCCCGGCTTAA
+ATATTAATGCGGACGAATACCAGGCAATATGGATCGGGAAAAAGCAGGTTAAGCAAATCC
+CTTTCTCTGACTGGTTGCCACCTGACTTTGTTAATGTGCTTTGCACTATCGGTATTGAGC
+AGGAGTTGCATATAGGTTACTACTCACCTGGCCGAAACAGTATGATGCTTGAGGTTGACG
+GAAAGCTCGTTGAGTTTAAATCTTCAGATCTAGGATTCTGGTTAAAGGCTGTGGCATGAA
+ACTTTATTTTGCTGTAGTATTAACACCGCTAATTTCATTTTCAGTAATGTATTTCATTAT
+CATGTAAGGATTAAAATATGTCACAAGCTAAAATCACTACCGAGCAACTTATCGAAGAGC
+GCATGAGCGGCCTGACACTTCGCGAGATCGCGGAAAAGTACGGTATGCACATTCGCACTG
+TCGAGGCGCGTCACGCAAAACTTGCAAAAGAAGGCCACTTCCACGGCAACGAGCATGTTG
+CTAAGATGGTTCCGGAAGGCTTCATGGTAAAAGGCACGTCAACCATGATTGACGCGGAAG
+GTAACGAGAAGATTCGTTGGGTTAAGACATCAGTTGATAATGAGCGCCTTGAGGTTCTAA
+TGGAAAAAGCGCGTGAAGCATTCTGTTCAGAGTTGCCTAAGGCTATTCCATCTGAATCAC
+CTGACGTTAGTTTTGATGAAGACACGCTTGCGATGTATCCAGTTTTTGATTTGCACATTG
+GTGCTCTTGCTCACAAACATGAGTGCGGCGAAAACTACGACACAGCGACAGCAGAGAAGG
+TTATGAATGGGTTCTTTGACTACGCTGTAGATAAGGCGCCAAACTCAAAGAATGCCGTAT
+TGGTATTGGGTGGCGATTTCCTACATTACGACTCTTTGGAGTCTAAGACTCCAGCGTCAG
+GCCATTACTTAGATTCTGACAGTCGTTACGCTAAGCTTGTTTATGTCGCAATCCGATCAG
+TACGACGCGCAGTCTCTCGAATGCTGGAGAAGCACCAAGTTATTGATATTAAAGCAATAA
+GTGGGAATCACGACGAATCAGGGATGGTTTGGTTGCGCGCTGCGCTTGCTGCATTTTATG
+AAGATGAGCCGCGCGTAAATGTTGATGTTAGCCCTGCCGCAATGATGATGACCAGCTTTG
+GTAAGACCCTTATTGGATACACTCACGGGCATCAAATGCGAAAAGCAGATACTCGACTAA
+GTGTTATGGCAACTGATTTTCGTAAGTTGTTTGGTCAAAGTGATTACGTTTACACGCATA
+GCGGTCACTGGCACAGTCAAAAGATTACAGAAACAAACTTGGGTATTGATGAGGTTCATG
+GTCAGCTTGGAAGTCCTGACGCATACTCTGCCAATGGCGGTTGGAGGTCTCAGCGTCAAG
+CTGCTGTGATTGTCTATCACAAGGAATTTGGTGAGGTTGGACGATTCATTTGTCGACCTG
+AAATGTTCTAAATAGCACCTTTTGTTAAAACAGTACCCGCGAAAGCGGGTATTATTGTTT
+TATAGAAACAAGAGGAGATTGCAATGAACTGGCACGAGCATTACGAATATAGGGATGGTG
+TTCTATATCACAAGGTAAAGCCATGCAGAAGGCATGATGTAAATATTGGGGATGTTGCTG
+GAAGGGTTGCCAAAAACGGCTATCACTATGTTGTTCACAAGAACAGGCCGTATAAGAGAT
+CTCGAGTTATATGGGAGATGTTTAATGGTGAGATACCAGATGGTTTTGTTATAGATCATC
+TGAATCACAATGCCACCGATGATAGGATCGATAACCTTGAGTGTAAGCCAAGAAGAGAGA
+ATATGGTTAATGTTAAGTTAAGGATTGATAGCACGACCGGAGTAACTGGCGTATCAAGAA
+AGAGGGATAACAAGTGGAGGGCGTACATAACAATTATGGGTAAGCAGAAGTGCAAGAGCT
+TTGACACGTTTGAGGAAGCTTGCGCGCAGAGGATTGAATGGTCAGTAACTCATGATTTTC
+ACCCAAATCACGGTGGAACATACTAATAGCACCTTTTACCTAACCCGCGCCACAGAAGTG
+CGGCATAGTAACCACATCGAAAACAGAGATGCTATATCATGAAGATAGTCAAGTGCATCC
+GAAATGACTCCAAAACACTTCCATTCCGTGTAAATCAGATCTATAGTGTTGGTTATGATT
+TCGGTGGGGGATTATTTGAGATTTACGACGGGCGAGGTTCAGCAATCCAGACTCCTCTGA
+ACGGTCACTACCTGGAATTTATTGAGATAGATTAACAATAGCATTCATCACCTTACAGGC
+TGGCATGATTTACATGCTGGCCTTTTTGCGTTGTGTCAAATAAATTTGAAGGTTAAAATC
+GACTCACTTGTTCAAAAAATATATGGTGAGATTATGAAAGAGTTTTTAACGGCTGCTACG
+TCAAGCACTGGCGGTGCTTCGTTGGTAGGGGCGGCGACAGGGCAACTTTATATTGCTGGC
+GCTACATTCATTTGCTTTCTGCTTTTTGGTGCCTGGGGAGCGTACTGGAAGTATCGTGAT
+AGCAAGGCAATTCAGGAAGCGTTAAACGATGGCGATCTAAATAAGGCGCTTAAGATCAGG
+GGGAGATAATGAGTTTAAAAAATAACGTTATAGGCGCATCAATCGGGGCCGCTTTGACGT
+TGACACCTACCCTACTGGAACGGATCGAAGGTATAGAATACGAGGTTTATTACGATATCG
+CCGGAGTCCCTACCGTATGCAGCGGAATAACCGGGCCTGACGTCATACCTGGTAAGAAAT
+ACACTAAGCGAGAATGCGATGCATTGCTGATAAAACATATCGGCGTCGCTCAGCGATACG
+TTGACAAGAAGGTTAAGGTTGACATTCCGGTAACTATGCGCGCATCACTGTATAGCTTCA
+CTTTCAACGTTGGGACTGGCGCTTTCGGATCGTCTACAATGCTTAAGCTAATCAATCAGC
+GCAAGCACAAAGAAGCGTGTAATCAGTTATGGCGATGGGTATACTACTACAACCCAAAAA
+CCAAAAAGCGCGAAGTGTCGAGAGGGATCAAGAATCGGCGCGCTGAAGAATACGCATATT
+GCGTTAAGGAACTATAATGAAACTTAAGAAAACGTGCATTGCAATTACGGTTGCTGTTGG
+TGTGATTTCTCTATCCGGTTGTTCGACGGCATCTGCTCTGAGTGGTTTACTTTCTGACTC
+CCCGGATGTTACGGCGCAGGTTGGCGCTGAGAACACAAAACAACTAGCAGGAGTAACAGC
+AAAGGCGGATGATAAGCGAGAAGTGAAGGTGAGTGATTCAAATATTGGCAAGATTGACTC
+ATCCGTCAAGAAGTCCGTGGAGGTGTCAACCATTCAGGCCAACACGGTTAACGCTGAAAG
+CATCACAGTAACCAAATCTGGAAGCTGGTACGATCCTGTGGTTTGCTGGATTCTCGTTTT
+TATTGTCCTGTTGCTGTTTTATTTTTTAATTCGTAAGCACGAAAAAAAGGAGGCGTAAGC
+CTCCTTTCTTATTTGTACCTTTTGACGTGAAGTAGCAACTCCCCATCCTGATCGCAAAGA
+TTGTGCTCACCGTCGTTATTGGCCCTCATCGAAGTGAGCAGTAGGCTAAGTAATCCTTGC
+TCAAATTCTTCTTTCGTAAGCTGGAGCCTTGCGCACAATTCGACGTTCCGATCTATCAAT
+GTTTCTACGTTCGCCAAACAAGTCTTCATCACTCATTTCTCCAATGTGCATCATTTCCCA
+CGTATATCGATTGTTGTAGCCATCAATACACATCAGCTTCATCATTACCGGGCGCTTGAT
+CTTTCCCTTGCACCAGTAAAATCCACCCTTGCAATCAAGATAACCCTCAGTCACGCAGCG
+TGCGCAAAACTCCTTAGACAGCGCGCTTGTAAATTCACGGCGAGTCATTCCGGCGGCCTT
+AGCAAATCGCTCACTTTCCTTGTGGGCGTATATAAATTTCGCTATGTGCTGTCGAGTGTA
+TTTGTCGTAACCTTCGCAGAACCTGAACAGATCTAAAAGAAGAAGCATATTATTAACCCA
+TCAGTCGAGGATTGATGAAAACAATATCATCAATCCGGCAAGTGTAGTTCATCTCTTCGA
+GCGTGATCAGCAGGCTGTCGATTCGCTCAGCTACTTTTTGTTGACCGTTGAACGGCGTAA
+CGTTACGGCACTTTGCAACAATGCTATGAATAGGTGCGCGACCTTTGTTCTTCTTTGCGA
+TCTCAGTGATAACATCAATCAGCTTACGAGATTCAGCCTCATCACCAGCATACCCGGCGG
+CGCTGGCAGACGACAGATAGGTTCTGGAAAGCTCATTGAAAATCATGATCGCTTCCTGCA
+TTGTTTCAAGGTCAATCTCACGGTTGGAACGGTTCGGTGATTCACCCTCCCAGTTCTTGA
+TTGTGTGAAGAACTGAAGCAATGCGCAAAGCGTGCTTATCGAACTTGCCGAGATGACCGC
+GTAGCATTGAGTGAGAGTATTTGCCCCCGGCTGCGAAATCCGGCTCCATAGCCTGGCGAG
+CAAGGTTTAATTCACGCATAGCATTACGGCTTACAGAGAGAACAACGTTGTCCTCCTTCA
+TAATGTTGTGCACAAGTCGATAATATTTACTCACCAATCCTCGATCGACTTCCTTATACA
+GTGCATCACCATTTTCATCGCAAAGAATACGAGTGCCTAAAAGAGGTTCCTCGCGAACCA
+ATAGGAAACGCTCAGATACACCGATACCGCGCTGGCCTGCGTCCATGATACCCTTGATTG
+TTTCATCCTGCGCAATGACGCAGATCGAACCGACCGGGCAAAGAGATAAGTTATTGTCCT
+GATTTGAACGCGCAACCTCCATATGGTTTTTATCCCACGCCTTGAGGATAAGCTCGCTGT
+TTGATTTCTTATCAGAACCGCCATAAGTCAGGCCAAGCAATGTATTTATTGCCGTTGCCT
+CATCAGAGATTACGGAAAAGTGGCCTTGAACAGCAGCTACTTTCGCAAGACCTTCCGGTG
+TAGGATCTGATACCGCGAAAACAATATCAGCCATCTTCTTGATCTTCTCTTCCAGTTTTT
+CCTTGTCCTCGTACAGCGCCGCCGTCGTGTTACCCTTCGGATCGTTTTTGATTTCCTTCT
+CGATCTGACGTAGCTGACTGGTTAAACGGATACGTTCCTTTTTGCGCTCTTCATTCAGTC
+TCTGAATCTCTGCGCGCATAGGTGTAATCGCCGCTGAGTTAATCGCGGATTTACCTGTTG
+ATGGTGGCTGGCTAATCACCATATAAAGAGCGGTCGGTTGTTCTTCTCCGTGATATTGCA
+CCCAAAACTTCCCTAGCATCGCGGCTGAGATGCACCCAATGAAATGAGCGTACGCAGACG
+AAACAGGAAACTGTACAGACTCAGCTTTTGCTTTTGCATATTCGAATACCAGGTTATCGC
+CACCTAACGAAATCAGCGGGAACTTATCGTTTCCACTGTTGATATCGATCGGGTCTTGCC
+AAAACGAAACTGAATCCCCGTAGCTGTTTTCACGAATTGCGATCGCCACCGGATTAACTC
+CAGTGCTATTTGCGATCTCAATAATCTGTTGGTAATTCAGTTTTGGCTTAATATTAAACA
+TCACAATAAACTCCTTAGTTGACGGCGTGAATGATACACCGTCAATGGTACACGCGTTTT
+GCAAAAAGTGCTATTCGATCCGTGATTCTCATGAAAACCGTATAGCCTTTCCGCTTCCTT
+CCTTGCTTTAGCCGCTGCATCCAGCGTCATGAAGGTCCCTAAGTGTTTTGTCTTTTTATT
+TATGGTTATGTTTGCCGTGTATCTATTTGTTTTTTTATTCCAATACACTCCCATAACTCC
+TGTGTTTGATGACACTACTCCCTTGTTTCTTAGGTTGTCTTGCCTAGTTACTAGCCTGAG
+ATTCTCAATTCTGTTATCGTCTCTTTCATGGTTTATGTGGTCTACATCCATTCCTTCTGG
+TATGTTTCCGTAGTGTATTTTCCATACTATTCGATGGGCGTATTCAAATACACCTCCTGG
+AAAACATATTGACCGATAACCTTTTTTATTAACAGTTCCGGCAAGTAGATTTCGTCTTCT
+TCCGATTCTCCACTCTTTCCAGTACAGTCTCCCGCTAACGTATTTCAGTTCATCTTTCAT
+AAGTATTTACTCTCAAAGGTTGTGCCGTCTGCGATACTAAAACCAACCTCTTCGCGGAAT
+AAGGTCCAGCGGCAACCGTCCTCATCAAAGATGTAACCAGCTACGCCTCCAAGAGCGCGA
+CCGCTTTCTACCTGGTAACGCTTTCCAACCTTGAATGATTTTTTCATTGGGTTGCGATGG
+TCAAGTCCGGTGCACTTGAGCGTCTTTGTTTTCAACTCGGTGAACTTCGCGAAGAAGATT
+TCATCAGATCCGTGAATATGTAATTCATCATGCTTTTCAAGTTTTAAATACTTCCCGCAT
+TTTAACTTTACTTCACGAGTATCATCATAGCGCTCACGACCTTTGTACAGATTGTTTACT
+TCGAATCCTGTGATTTTGTCTGCGGAAGTGCATTTAAGTTTGATTGATTTCATTGTGTTC
+GCTCCTGATTGGTTATCTTGAATAAGGCCACTTTATCAAATGACCTTACGGCAATATTAA
+CAAATCGTGCTATTTACCAGGGAATATAATCGTCAACGTCCTCATTAATATCATCACCGG
+GTAACTCAAAGTTAGGATCGTATTCGCTTTCAGCATCCATATCCATATCACCAAGAGCCT
+CGTCAAGTGTGATTTCTTTATATGCTACCTTGATGCACCATTCGGAACTAAGTCCGGCAT
+CAAGCGCTGCAAAATAACGAGTCCAGAAATTATCTTGTTCCATTAATTAGCCCCACGAAT
+TAATGTAAATCGAGTTAGCCTCAAGCGTTGCACGAACATCCGCATCTGTTGCATTTATCA
+GTCGAGAGCCTGGCACGCTTCCGATAACACTATTTCCGTTACGCGTCTTAGTTACCGTCA
+TTGAAATAAAGCCTGAAGACTTATCCATCTTGATAAATACACGGCCTTTTGCGTTCAGGT
+GCTTGATGATATTCTCAACTTTAATGCTCATTTTTGATTCCTTTGTTTTGTTTGGTATGG
+GAGTAATATACCTTACTCCCCGATGTGTGTCTTTAGCAATTTGTGCTATCAGCACTCAGA
+AATTACTTTACACTCTTTGATCGTGCCGCCGAGAACTACCTTTTGCATCATTGCTTTTTG
+GCGACTATCGTAAACGCGAACATTTTTTACGCTGTTAAACTTTCCTGTATAGAAAGTGTG
+AACATAAATCATTTTTTACCGTCCCATGTTTGATATGCCGTAACCAGGTTAATGCTATGG
+TCGAACTCGTCAACGCTAAGCGATCCGATTCTAACAAGCTCTTTGTTGTGCTTAATGTCA
+ACCATCCGGCTATATGGATGACCTGGCGCGTATTTAAATAGACGCTTGCTGTCGGCAATG
+TCTGCGTTAATTCCGTAGAATCGGCCTACTGCCGCCTTAACGCGCTCCATGATGTTTTTT
+TCGTGGATTGCCGCCGCGTTATTTAGTGCCGCAACCGTACTTCCGTTAATCTTCGGTGAC
+TTCGTTGTGATTTTGTAAGTGTGGTTCATTATTCCCCCTAACCAGTCAGGATAGCTTTCT
+AGCTTCATCAATGATAACAGGTTCGAGACGCTCAAGTCGAACGCGGAACGTGTTCATAAA
+TCCCTCGTTATTCTTCAATAACTCGAAGGCGTCCGTAGCTTCAATGGCGCTACTACCCAT
+GTAAGCGATTACGCGCTTCTGTGTTTTTCGTGAGATCGCCACTACGCGGTAAATATTATC
+ACTCATACAATGCCTCCTTAACACGATCCGCTACGAATGAGTTAACGGTGATTGAATAGT
+CAGCCACGAAGCAAGGAGCAACACTGCCTGGCTCAGTGCGCATGAATGGTTTATTGAACT
+GAGTGATTACCTCGCCAGTATCGTTATCAATTAAGCGAGACGCGCCTAAGCGGTCCACCA
+CGCGCGTTACGCGGTCCACGCACTCCATTACATCACTTACGTCATAATAGGCGGTTAGAG
+CGGCTATCATTGCCTTCTCTTGCTTGTTGTGAATCTTACGCGCAATGTTCTCTATCGCAA
+CGTGCACTTTCTCGTCAGTCTGGATTACCGAGCTGGCTGAGAAGTCCAGATCTGTGAACT
+GCTTAAACATAATACTTTCCTCGTTTACTGTTGATGTGATGAATCATACCCAGTAGACAT
+GGATAAGTCGTTAGCAAAAAATGCTATTCCAATAATTGCTCATTATTCCATCAATACTGG
+AATATCTGGAATAATCACGATCATGATTGATCTCTAATGATGAGATGTGATTGTTGCATG
+GTGTGCAACTGTTGATGTGATTGTTGCTTAGAATGCAATGATTGTGAGAGGGGGGATCTA
+GTGTTACCAGGTTCGCCTGGTAGTCATCTCCATTTTTAGCAAAAAGTGCTATCGATTACG
+ATTACGCTTGATTGCGTGTTAATCATTGTATATGATTTGCCTAAATCGCAATCGTATACA
+AAATTACGCCAAATTACGCCTTATAATATACATATAGATATATGTATATATATAATATAT
+AAGTGTTTTTTTTATTTATATATATAGATACTATTATTTGTAATATGGTTGTATAGCGTG
+TCTTTTATGTTAATCGTTGGATGCTCTGATTATGATGCTCATGTTTATATATACAGTAGT
+TTTATCGGCGCATATTTATTTAGGGAAATCGATTAACACGATTAACACGATTAACCACAC
+TTAAAATCGCCCCGCAGACCTTGACACATGCGGGTCGTCGTGTAACACTACGATTAGAGA
+CACGGTGTTACATGTTAATCATGGTGTATATTGAAACTAAGGAGAAAGCTATGAGTTCTT
+ACCAGTCAGACGCAGTACAGGCAGCAATCAAGGCAGCTTACGAGAAGGCCGGAGTAACGG
+TTGAGCAGCGACCGGAGGCAAAAGTGACCGATGTTATCCGGGCCGCCTGCGATCAGCTTT
+ATGGTGATGGCGAGAATACCGAGTTCACATTCGACGCGAATAAGATGGCTGAGGCCGCAG
+CAAGAAAGTCGATGCCAGACGCTGACGAACATGATGTTGCCAAAGGCGCCGAGTCCTGGT
+TGCTCGGGAAGACGGATGAGATTAACGAGAAGTTTAAATCCTCATTCATCACCCCGATCG
+TTTCTCGACACTTCTCCAAGATCGGCAAGTCGGTCAAGGTGAGCGTGACCATGAACGATG
+AGAAGTTGCGAGTCGTTACTATCTCAGTGAGTGACGAAGAAGTTCCGGTGAAGAAGCGCC
+GCAGCCGGAAAAAAGTCAGCCTGGCTGATTGTCTGGATTCGTTTGTTCCTGATGTTGATG
+ATCTTGAGAAAGGCGACGTTACTGTAAGCACCGTGCGCGACCTGGTTCGCCAGATGAAAG
+CGCATATCGAAAAATGTGGACTGTAAGGAGAAGTAATTATGTTTAATATCAAACCATTAA
+CAGAAGCAGAGAAACAGGCTCAGGCCAAGCAAACCGAAAACATCCAAGTGATCGCTGATG
+CGCTGATTGGTAAGAGGTCAATCAAAATAAACCTCGACACTGTTGGTCAGTCATTTTTTA
+CTAAAGGTTTGGATAAGTACGTTATAAATGTGAAGGCGAGAGACCTGGTGGCGAGAATTC
+AAAAGCTAAACAATCAAAAGCTAAAGCTCATCAAGGTCGAAGGCAACATGTGCGAAATTG
+AGAACCTCAGCGCACCAGACCCGAATAAGTGGGAAATCACCGATGTCGAGTTTATCGTAG
+AATAGCACTTTTTGTTAAAACCGGATCGGGGTATCTTGCTATAGTTACCCCATCAAAACG
+AGATACCAATCAGAGGAATCACCATGTCAATCGTCAAGAACCAGCAAGCCATCGATTCAA
+CCAATAACAACCGCTTTGCTATTTTCATCACTCGCGACAACAAGCGCTTTGCAGTAAAGG
+CCGTACCAGGTGGATACAAAACCTACATGGAAGATAACGGAAAGTGGGTGCGGTGCGACA
+ACCTCGCAAACTTCTTGGTCTGGAACGCAGACCTGCAGGGATTCGATGACATCAGCACTT
+TAATTGAGGAGTAATAATCATGCCACGTTACAGCAACCTAACTCAACTAACCCGCGTCAA
+CGGGCACATGATCCCGGCAAAATCCACTCACTACGCAATGGGAGCAAAGCACGGATTGTA
+TTTCAAATGGCGCGGTCAATGGAACTTCACGGTTATTCGTAATTTCTACATTAGAGTTAC
+AGGTGATGACCCGCAATCGGTCGTAGAGAACTCAATCGGCGACAACAAGATCGAGGTGCT
+GAAATGAACTTCAACATAATTGCTTTCTGGTCTGCCGTATGGTTCTTTTGCGTAGGTCAT
+GTTGTGGTTGGAATCGTAATCATGTTGCTACTGTGTGCGGGAGCGTTCGAATGATGCGGA
+TTCTGATTTGCATGATGGCGGCGGTCGCCATGGCTATCCTGGTAGTGTCCGGCTGCGGCG
+AGGCCAGGGATAGCTGTCATGAAACCGGGAGCCAGGTTACTACTTTCGTGATGGTTGGCA
+ACGTATTGCTACCAATAACATCAAATGAAATCACTTGCGAATAGCACTTTTTGTTAAAAC
+TCAACTCCGGGGTTGCGATATAGTAACCCCATCGACAACGAACGAGGACGCAAACATGAA
+AATTAAATTACTTAGCAATGGCGGTTACAAGGGATTCACCCGCGACCTGGAAGCTGACCC
+TATCGTGGTTGACGCGGTTAAGTGCGACTCAAGTACTGGAGGCTACCGCGTTAAGGTTGA
+TGACCTTGTAAAAGCTGGCGTGTACGATCTTGATTATGGCCTGTCGGTTAGCCCGGTATT
+TGGCCCAGCTGACTTCAACGAGAAAGACGGAACGATGTTCTTTTTTGATTGGGAAGTGAA
+GGCAAACATCAAGCCGCGCAAGGTTCGTCTTCTCAGCAATGGCGGCTACCCGATGCGACC
+AGGTTATGAGAATCGCACGTTCCCGGTTATCGTTGACTTCATTGGGACAACTGACAACTT
+GGTATACGTTAGCCATGAGCAACTTAAGGCAGTTGGATTCGTTGGCGGTATGAATAAAGA
+AGCGCTTTGCTTCTTCCATCGATGTCCAGAGCCGATCGGTATTGAGTGCGAGTTAGTATA
+CTAAGCACGAATTGTTAAAAGGGGATTTGGCCTGACTGGTATAATCCCCACATCAACCAC
+TAATAGGAAAGCATCATGTTAAAATTAAAAGATATTCAGTTCCCTGTAGTATTTAACACT
+ATTAGCTGCGGTAAAATAACCTGCCACAGCAAAGATCGCGCAACAGATTCATCATTCAAT
+GAGTGCCACCCGTCTATTGTTGGTAATCTTATTGAGCTTCATAACAATCACAATCCTGAT
+AACATCCCATCTCTTCCATATTATGTTGAGGGAGTCGGGCCTGGTTGGAAGGTTGGTCGC
+TCCATCTTCCATGCAGCAAAGCCAGAAATCAAGCCAGCGCTACAATGCACTCAGATCGAG
+AACATGCCATTGAGCGCGACACTAAAAGGTGTCCAACTTGATAGCGAATCCTGGATCGAG
+ATTACCGCCACGCCTAAAACTATTGAGGTACATGATGATGTGGTGATTCTCCTGTTGCAT
+TACGGCAGCTTTAAGCACAAGACGGTATCAGGTGAAATCAGCATTAAGCGCGGAACTCTT
+GTCCGCTACGAGGTGAAATAATGACTGCATGGGTCTTGATTATCTTGATGAGCAAAGGTC
+CGGATCACGTATACATGGAAAGTCAACAATCATGCAACAAGGCACGGGAAGTTATCGCAG
+AGAACAAGCCGTTCGGATATGAAGTAAAAACTATGTGCGTTAAACGATAGCACGAATTGC
+TAAACCTTCCGCAAGGCCATTTGATATAGTGGCCTTATTGAAGCACGACAACCAACTGGA
+GGTAAAATTTATGAAATTCGAATGTATCAGCGATAACACCAAAAAATTTACTGTTGGCAA
+AATTTACGATGTTCCGACTGAGCACGCAGAGCAAACCGTAGCTCTGACCGACGACACGGG
+CCGCAACCGAATTGCAACCGTAACTCACAACGGTGAAGGTCTTCGCTGGAATAGCGGCGG
+CACTAAGTTCGCAACGTTCGGCAAGAAGCGCAAGCGCACCTTCCGCGTCAACGGCAATGT
+TGCAGCTAACAAGATCCATAACGTCAAGCCGTCGGAAGTTGACCGCAAGCCAGCGCTGAA
+GTTTAAAGAGAAGGTGGATTTATTCAATCTTGCCGCCTCGCTTGTTCTCCTGGTCGCTGC
+TATTTCGCTGCTTTCCATCATGTAATGCTAATGGGGAATCGACTCTGAACGGTTCCCCTT
+TCTTTTGGAGAAAACACTATGCCAGACTTTTCTAACTGGAATAACGAGCCGCCATCATTT
+CAGGAGTTGCTATTCTGCCTCCTGGTCCTGACATTATCTCTTAAGGGTGTTTTATGGCTA
+CTATCATGACAGTAGAAGATGCAGCACGCGATGCAGTGGAAGGAATGCGCCCAAATACCT
+CCAGAATAGCACACTACTACAAATCTGAGGTGTCGGCAGTGCAATTGGTCCACGAAATTT
+TAAGGCTCCCACAAGTCGATTCAGCGCGCGTTGTGACGTGCTTAAAAAATTATTTTTGCA
+TCACTATTAAAACGAATAGCACGAATTGCTAAAACCTATCAAGGGGAATACGCTATGATT
+CCCCTACACCAACAAACGAGGAAGCGATCATGAAACACTTAATCTGCATTGAAGCGCCTA
+ACGATCAATACACCCTGCATGGACTTGGTGTGTTCAAAGGTCACTACATTACCGCAGGAA
+CTTACGATGCTCGTCGCGGCGATGGCGACCTAATGATTACGTCAAAAGAAGTAAATCCGT
+ACATCATGCAGAATCTTGGCAATAACGAATATATGGCCTATGGCTGCAACGCGGTGTACA
+AGCACGTTAAGATCCGCAAGCGTGTTGTGCGTGCATTCAAGAAGATTGCAATTAAATACT
+GGAAAATGAGCAAGAAAGATGCCGGACGTTGGGCGCGCAACGTTGCAGATTCATACTTCT
+ATCGTAACGGCGAATCCTGCTACTTCCTGATCGATGAACTTATGGAAAACTACGGTGGCG
+ACTTCAGCCAGGGTAGCTTTGATGACTGGGCCAACTATGAGATCAGTTGCTGGTAATAGC
+ACGAATTGCTAAAACTTGCTCAAGGGCATTTGTTAGAATGCCCTTCGTTGAGTTAAGCAA
+CCAATCAGAGGAATAAATCATGGATAAAATCACAATTTGGGGCCAGACAATCAACCTGTT
+TCTCGGCACGCGCCGAGTGGCAATCTTTGACTTTGACGGGACACTTAGCGATGGATCTGG
+TCGACTTCACCTGCTGCCAACAAAGGATTTGCACTTGACTGAAAGCTGGTCTGAGTTTAA
+CCGAGCGGCAATATTTGATAACCCAATCCAGAGCACGATCGATGTGATGAACTCTATGTT
+TGCCGCTGGGTATCATGTGATCATTTTAACCGGGCGAAGTGATGAGGTGCGTTACGCATC
+TGAGTTATGGCTTAAGCATCACGGCGCTCGATATGATTACTTAGTCATGCGACCGCATAC
+CGACAACCGCAAAGACACGGTAATGAAAGAAGAGGCAGTGCGCGCTATCGGCATTGATAA
+CATTCTTGCGGCTTGGGATGACTCAGTGAATATAATAAAAAAATTCAGAGATCTAGGGAT
+AACCACATATCAGGTTTGTGAATATGCCTGTGATAGTCGAGAGGATTTAAATAGTCATGG
+TGTCGATTGATAACAAATCAATGGTAAGAGAGTTATTTACTTATTCTGACGGCGTTCTGT
+ATTGGAAGGCCAAATCATCTAAATACAGTAGAGCTAAAATAGGAGGCGCGGCAGGAAGCA
+AGGATAAAGACGGATACATAATAATCAGAGTAAGAAACGAAACTAGAGGCGCTCACAGGC
+TTGTATGGATATACCATAATGGCAAGATACCTGATGGAATGGAGGTAGACCATATGGATG
+GAGACATAACAAACAATAGAATAGAAAACCTAAGATTGGTAACGAGAACCATAAATAACA
+GGAATCAAAAAAAGAGATCTGATAACACAACCGGAGTATCCGGTGTAACTTTCATGAAAG
+ATAGAGGAAAGTATAGGGCGCAAGTTAGAAACAAGAGACTCGGGCAGTTCGACACAATAG
+AAGAGGCCGCCAAAGCAGTAAAGGATGAGCGGGATAGATTAGGTTTATTCACAAAAAGAC
+ACGGGGTGTAAACATGAAAACAGCTATCATTTTAAACGGCGCACCTGGTGCGGGAAAGGA
+CACTATCGGATGCATCCTGGCTGACACTTACGATCATGTAGCGCTACGCAGCTTCAAAGC
+GCCAATGTTTGAGATTGCCCGAGCAATCCTGGGTGAGACTAATTTCGAGTATTTCATGTT
+CTTGTATGAGGACCGTCGCTATAAAGAAGAGCCAGCGTCAATCCTGAACGGTAAAAGCCC
+GCGCCAGTTTATGATCTGGATTAGCGAGGAGGTCATCAAGCCGCAGTTCGGAAATCGCTT
+CTTCGGTATGCGAGCGGAAAGTAAGGTGAAAGAGTCGCATTCACTTTCGGTATTTACTGA
+CGGTGGATTCAAAGACGAGATCTTGCAGATGATTGAAGGTGACATCCAGGTCAAGCTGTG
+TCGAATCCATCGCAACGGTTGCAACTTTGACAACGACAGTCGCGACTATATCTATCTTGA
+CGATATGATCGGGGTCAACGGTTATCAGGAGTGTGACTTCTTTTCTGTCGAAGGCCATCC
+AGAAATTACCGCTCAGCACATAGCCGCCACGTTCATCAATAAATAGCACGAATTGCTAAA
+ACGTCGGTGTGGTGATTTGATATAGTTACCTCATCGACAACGAAGAGAGAAAATCGAAAT
+GATGGTATCAACTGATAAGTTTTTCACTTGCACCAAAACTTCTGAGGTATTCGAGCTGGT
+TCATACTGACAACGGTGATTTCATGCATGACGGTTGCGACGCTTTCATTGAAGTGAAAGA
+AAGCGACTATGACGACGGAGTTTATTACAACCCTGCGGTTAACACGCAGTTTTTTACCCC
+GATCGAAGAGGAAGGAGAAGAGGCATGATCACGATTAACCTGTCAGATAAACAAGCGCGT
+GAAATACTAGACACTATCGGAGAACAGCTTCACGTAAAAGGCGATACCGCTGAGATTCTT
+AACCAGATCGAAAGACAGCTAACCCCTGTGTCGACGAATCAAGCTGAGTTCGCAGCATGG
+AAAAGCGAACGCATCCTGCCAAATATCATCAAGGCATGGAAGCGCAAGCATAAAAAAGAA
+ATCAACGTTGAGGATTTATTTACCGATGAATTAAGTCCTTCAAATGTTGCTCAATACCAG
+TTGCGATACATGGAGTCGGTTTGCAATCAGGTTTTAGGTGTAAGTTTTTCATTCAAAGGT
+GATAAATAATGTTCGGTTTAAGCGAAGCGGAGTGGAATGTTGTAAAGCGTGCCGCGAAAG
+AATTAAACAAATTCGTCAGCGGAATGAAGAAAGAAGATCGGAAAAACGACAAGATTATGA
+TTGACGTAATTTCGACTCACCATAAAAAGGTCGAACTACTCATTGACCGCTACAAATTTG
+TCTGGACTGCCGGGTATATTGCAGGGCGCGTAGGTAACAAAGAGGGGGATTATGAATAAT
+GGCCAATTTACCAAAGAAAGGCGATCAGGTTCGATGTGTCACTTCACGCAATGGTAATGC
+TTTATCGGCGGGGTGCTTGTACGACGTAGAAAAAGTCAGTAAGTCAAAGAGGCTTGTATT
+CGTGTACGGCGACGATGGAAATCTGCATGAGATTGATTACCCGCAGGATGTAACTAATGG
+TCAATTCGAAATTAATGATTGACCTAAATCCCTGAGCGGTGATAGTATTAATCCCGTAGA
+CAGACGAGGCGCAACTAAGCGCAACGCGTGAGACGATTCTCACACTTCCAGCTAACAAGC
+TCGGTTGCATAGTGGTTAAGCAACGCCGCAGACCCGTAAGCGGCAACAATTCAAGAGGAT
+TGCATAATGCAAAAAACTAAAGACGAATCAGTCAAAATTGAAATTAAAGTAACTCGCAAC
+GGTGAAACCACTCGTTATAAAAAACGATTAAATCCTGGCGAGGCTGTTATTGGTCGCATT
+GCTGGCGTTATGATTAAGGCGCAGGAAGATGAAGCGATTCAAAGTTAAATTAATTATTCG
+AAAGATGGGAATGTTTTGCCAGTCGTGCAAGCAATCTTTCGAAGCTGAATTATCAGCAAC
+CAGTCAGGATGAAGCCATCACGAAAGCAAAAAAACTTTCCGGCGCTAACCTTGACACTCA
+CAAAATAAATATTGAATTAATCAAGGAGATTTAACATGACAATTTTTTTATTAATTATCG
+CTGGTGTCATTATTTTTGGTGCTGGTTTGTTTGCTGGCTTCGCACTTGTGGCGGCAGCAA
+TTGCGATGGACGCGAAGGATAAAACTGGTGTATGGCTGACCTACTCACCTAAGAAGGACC
+AATGGGAAATGACTGGCGACCTTGCTCACTGCTATTCTAAAGCTAAGACCCACCCTAAAG
+GCATTAAACGACGATTGTCGTGATGAACACTAACCCGCTCCGGCGGGTTTTTTTAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/test-data/AY216660.gff3	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,315 @@
+##gff-version 3
+##sequence-region AY216660.2 1 48836
+AY216660.2	GbkToGff	gene	40	576	.	+	.	locus_tag=CPT-T1_001;ID=CPT-T1_001.gene;
+AY216660.2	GbkToGff	mRNA	40	576	.	+	.	locus_tag=CPT-T1_001;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_001.mRNA;Parent=CPT-T1_001.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	40	43	.	+	.	locus_tag=CPT-T1_001;regulatory_class=ribosome_binding_site;ID=CPT-T1_001.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_001.mRNA;
+AY216660.2	GbkToGff	CDS	52	576	.	+	0	locus_tag=CPT-T1_001;codon_start=1;transl_table=11;product=terminase small subunit;translation=MSEPKNAPVVQGGNFKELYKKKFGTVLAKNRAMTPEQLFDLSVKYFEWAEDNAIKASESASFQGGVYESLVHKPRVFTWTGYRLFIGASEAAIIKWKREEEYSEVMEFVESVINEQKFQLAANGVINASFIGKDLGIDKPASINIENSSASASTVVATTEDAMKEAVNSILDML;note=Orf no. 54 see PMID: 14972552;ID=CPT-T1_001.CDS.1;Parent=CPT-T1_001.mRNA;
+###
+AY216660.2	GbkToGff	gene	589	2184	.	+	.	locus_tag=CPT-T1_002;ID=CPT-T1_002.gene;
+AY216660.2	GbkToGff	mRNA	589	2184	.	+	.	locus_tag=CPT-T1_002;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_002.mRNA;Parent=CPT-T1_002.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	589	592	.	+	.	locus_tag=CPT-T1_002;regulatory_class=ribosome_binding_site;ID=CPT-T1_002.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_002.mRNA;
+AY216660.2	GbkToGff	CDS	601	2184	.	+	0	locus_tag=CPT-T1_002;codon_start=1;transl_table=11;product=terminase large subunit;translation=MGDLIMIQWEDLNATQKLAIKKMSEANFEKMIRIWFQLMQAQQFQPNWHHLYLCHEVEEIIAGRRGNTIFNVTPGSGKTEVFSIHLPVYAMLKCKKVRNLNVSFADSLVKRNSKRVREIISSNEFQELWPCKFGTSKDEEMQVLNEDGKVWFELISAAAGGRITGSRGGYMTPGFSGMVMLDDIDKPDDMFSKVKRERTHMLLKNTIRSRRMHNETPIIAIQQRLHAQDSTWFMMNGGMGIEFDQISIPALVTEEYGKTLPDWLQPYFERDVLSSEYVELDGVKHYSFWPSKESVHDLLALREADQYTFDSQYQQKPIALGGSVFNSEWWTYYGSSLDADEPDPGKYDYRFITADTAQKTGELNDYTVFCLWGKKNDKVYFIDGIRGKWEAPDMERQFTAFVNQAWRHNKSMGVLRKIYVEDKASGTGLIQNLRKKTPISITPLQRNKDKVTRAMDAQPVIKAGRVVLPEEHPMLAEIIAEHSAFTYDDTHPHDDIVDNFMDAANIELLTIDDPIERMKRLAGMVKR;note=Orf no. 53 see PMID: 14972552;ID=CPT-T1_002.CDS.1;Parent=CPT-T1_002.mRNA;
+###
+AY216660.2	GbkToGff	gene	2230	3522	.	+	.	locus_tag=CPT-T1_003;ID=CPT-T1_003.gene;
+AY216660.2	GbkToGff	mRNA	2230	23794	.	+	.	locus_tag=CPT-T1_003;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_003.mRNA;Parent=CPT-T1_003.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	2230	2233	.	+	.	locus_tag=CPT-T1_003;regulatory_class=ribosome_binding_site;ID=CPT-T1_003.Shine_Dalgarno_seqeunce.2;Parent=CPT-T1_003.mRNA;
+AY216660.2	GbkToGff	CDS	2239	3522	.	+	0	locus_tag=CPT-T1_003;note=HHPred predicted structural similarity at 99%25 probability to phage T4 portal protein gp20 Protein Data Bank entry 3JA7 over most of protein%3B Orf no. 52 see PMID: 14972552;codon_start=1;transl_table=11;product=portal protein;translation=MKIVKHDGYNDIFNGGADGSPKPFFMSDASYHVGSFYNDNATAKRIVDVIPEEMVTAGFKMSGVKDEKEFKSLWDSYKLDSSLVDLLCWARLYGGAAMVAIIKDNRMLTSQAKPGAKLEGVRVYDRFAITVEKRVTNARSPRYGEPEIYKVSPGDNMQPYLIHHSRVFIADGERVAQQARKQNQGWGASVLNKSLIDAICDYDYCESLATQILRRKQQAVWKVKGLAEMCDDDDAQYAARLRLAQVDDNSGVGRAIGIDAETEEYDVLNSDISGVPEFLSSKMDRIVSLSGIHEIIIKNKNVGGVSASQNTALETFYKLVDRKREEDYRPLLEFLLPFIVDEEEWSIEFEPLSVPSKKEESEITKNNVESVTKAITEQIIDLEEARDTLRSIAPEFKLKDGNNINIREPEETTEPEPGLGEKLEDEN;ID=CPT-T1_003.CDS.1;Parent=CPT-T1_003.mRNA;
+AY216660.2	GbkToGff	gene	3496	4273	.	+	.	locus_tag=CPT-T1_004;ID=CPT-T1_004.gene;
+AY216660.2	GbkToGff	mRNA	3496	4273	.	+	.	locus_tag=CPT-T1_004;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_004.mRNA;Parent=CPT-T1_004.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	3496	3499	.	+	.	locus_tag=CPT-T1_004;regulatory_class=ribosome_binding_site;ID=CPT-T1_004.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_004.mRNA;
+AY216660.2	GbkToGff	CDS	3512	4273	.	+	0	locus_tag=CPT-T1_004;note=InterPro domain IPR006528,Orf no. 51 see PMID: 14972552;codon_start=1;transl_table=11;product=capsid morphogenesis protein;translation=MKINGVATQWRYPEMSERAMSRSLQDVAAKLTEKMRDELKPMKFDATDEEIDQTERSLLDYVESLIAPIIGSLSSVALTIYKFNSKQWLRIARNAGGKKNQAVMLLALIGPTAAESWYSGQYNLWRSQVATSIRKFAANMVTDFTDKLRAASGQGKSKDFVVELAKERFGIYRNWAKNRASGIVGTWNSRLMRQRIKDAGVSYYFWRGVMDLREREKHVRWEGKRIAVDSDHVFPGEEYNCRCWAVPDFSTGD;ID=CPT-T1_004.CDS.1;Parent=CPT-T1_004.mRNA;
+AY216660.2	GbkToGff	gene	4264	5388	.	+	.	locus_tag=CPT-T1_005;ID=CPT-T1_005.gene;
+AY216660.2	GbkToGff	mRNA	4264	40736	.	+	.	locus_tag=CPT-T1_005;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_005.mRNA;Parent=CPT-T1_005.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	4264	4268	.	+	.	locus_tag=CPT-T1_005;regulatory_class=ribosome_binding_site;ID=CPT-T1_005.Shine_Dalgarno_seqeunce.2;Parent=CPT-T1_005.mRNA;
+AY216660.2	GbkToGff	CDS	4276	5388	.	+	0	locus_tag=CPT-T1_005;note=HHPred predicted structural similarity at 72%25 probability to phage T4 prohead core protein protease gp21 Protein Data Bank entry 5JBL over predicted catalytic third of protein%3B Orf no. 50 see PMID: 14972552;codon_start=1;transl_table=11;product=capsid maturation protease;translation=MKAKQRFDSVKIKAHFDDNGFLVDRPIVARIGAQVYKTPHGDRVEFRPASEVFKQDSLQSFAGKPITVGHVTVTPQNAKDVVVGSCAGAGIASGVGVEVPLSIYSDYAISKAKAKEAGELSVGYTSVDIDKPGWGSNETGEYIFEEDMKQDEAPPEGWVRFDAVQTNIKVNHIALVFKGRAGIAKLNLDAEQEFPYDNNVQLTNEDKQMKKIKIDSVDVEVTEDVANHIEKLTAQIATIQGKADGFEAERDALKVKVDSLPELVKAEVEKQKADAAARAEVTAVAETAGVKHDGLDIKDVKIAVVKAMLDKDVSEKSDAYIDAMFDVAKDSDIMAIQRKAVKGDSIEGGKPEEKNDAAPVTPNSRLSKVM;ID=CPT-T1_005.CDS.1;Parent=CPT-T1_005.mRNA;
+AY216660.2	GbkToGff	gene	5389	5876	.	+	.	locus_tag=CPT-T1_006;ID=CPT-T1_006.gene;
+AY216660.2	GbkToGff	mRNA	5389	5876	.	+	.	locus_tag=CPT-T1_006;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_006.mRNA;Parent=CPT-T1_006.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	5389	5392	.	+	.	locus_tag=CPT-T1_006;regulatory_class=ribosome_binding_site;ID=CPT-T1_006.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_006.mRNA;
+AY216660.2	GbkToGff	CDS	5400	5876	.	+	0	locus_tag=CPT-T1_006;note=HHPred predicted structural similarity at 92%25 probability to phage TW1 Protein Data Bank Entry 5WK1 capsid stabilizing protein%2C equivalent to phage lambda gpD dec protein%2C over most of protein%3B Orf no. 49 see PMID: 14972552;codon_start=1;transl_table=11;product=capsid decoration protein;translation=MAQINASYQRDMAIALPGMVADTSKYNIDGACVVNEGDVLVGAAVQVVQAQAVDGHKLVKALTTGTTPYGVAIRSHWQTVNAQNQMIYEDGGAINVMTSGRVWMLSKSTEAPTFGSAVKLDVDGQEKSDGTIETTWTYAGGWTKYKDIQLVEVQLHQL;ID=CPT-T1_006.CDS.1;Parent=CPT-T1_006.mRNA;
+AY216660.2	GbkToGff	gene	5926	6705	.	+	.	locus_tag=CPT-T1_007;ID=CPT-T1_007.gene;
+AY216660.2	GbkToGff	mRNA	5926	6705	.	+	.	locus_tag=CPT-T1_007;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_007.mRNA;Parent=CPT-T1_007.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	5926	5929	.	+	.	locus_tag=CPT-T1_007;regulatory_class=ribosome_binding_site;ID=CPT-T1_007.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_007.mRNA;
+AY216660.2	GbkToGff	CDS	5938	6705	.	+	0	locus_tag=CPT-T1_007;note=InterPro domains IPR008964 and IPR003343%2C invasin/intimin cell-adhesion fragments superfamily%3B bacterial Ig-like domain-containing protein%3B Orf no. 48 see PMID: 14972552;codon_start=1;transl_table=11;product=hypothetical protein;translation=MAYENLMLRPACPGNLSDTSTYNIDGACVAQGDIEFGSAVQVVGIVDGVKVVTALSDGGTPYGIAFRSQYEHLSGKILDGEVCNVVSHGRVWALTSLDEAPSLFSKLQFGSGGVVTGGSGYAGWTFAGGFVKHEDGYIIEVRVKQNAFIVPPPPPPVVLVESATITTDKESPQPNNVTIQCVANALPANATDKTGKWSIDATNIATVNPDSGLVTPVGGEVVGDFNITWTANDASKTTATIAYRVEAVPTPEVDV;ID=CPT-T1_007.CDS.1;Parent=CPT-T1_007.mRNA;
+AY216660.2	GbkToGff	gene	6784	7755	.	+	.	locus_tag=CPT-T1_008;ID=CPT-T1_008.gene;
+AY216660.2	GbkToGff	mRNA	6784	7755	.	+	.	locus_tag=CPT-T1_008;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_008.mRNA;Parent=CPT-T1_008.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	6784	6787	.	+	.	locus_tag=CPT-T1_008;regulatory_class=ribosome_binding_site;ID=CPT-T1_008.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_008.mRNA;
+AY216660.2	GbkToGff	CDS	6796	7755	.	+	0	locus_tag=CPT-T1_008;codon_start=1;transl_table=11;product=major capsid protein;translation=MTTKKFDEADKSNVEMYLIQAGVKQDAAATMGIWTAQELHRIKSQSYEEDYPVGSALRVFPVTTELSPTDKTFEYMTFDKVGTAQIIADYTDDLPLVDALGTSEFGKVFRLGNAYLISIDEIKAGQATGRPLSTRKASACQLAHDQLVNRLVFKGSAPHKIVSVFNHPNITKITSGKWIDVSTMKPETAEAELTQAIETIETITRGQHRATNILIPPSMRKVLAIRMPETTMSYLDYFKSQNSGIEIDSIAELEDIDGAGTKGVLVYEKNPMNMSIEIPEAFNMLPAQPKDLHFKVPCTSKCTGLTIYRPMTIVLITGV;note=Orf no. 47 see PMID: 14972552;ID=CPT-T1_008.CDS.1;Parent=CPT-T1_008.mRNA;
+AY216660.2	GbkToGff	gene	7794	8093	.	+	.	locus_tag=CPT-T1_009;ID=CPT-T1_009.gene;
+AY216660.2	GbkToGff	mRNA	7794	8093	.	+	.	locus_tag=CPT-T1_009;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_009.mRNA;Parent=CPT-T1_009.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	7794	7797	.	+	.	locus_tag=CPT-T1_009;regulatory_class=ribosome_binding_site;ID=CPT-T1_009.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_009.mRNA;
+AY216660.2	GbkToGff	CDS	7806	8093	.	+	0	locus_tag=CPT-T1_009;codon_start=1;transl_table=11;product=hypothetical protein;translation=MAKEKTVVIVNVGVALQMFRLEDGSFAKVLPDEEVTLPASVLDLPGLRCLIAREEIEVKDDSATNRKIRAEMAKITKPDPWDKMSVKELEDGGEY;note=Orf no. 46 see PMID: 14972552;ID=CPT-T1_009.CDS.1;Parent=CPT-T1_009.mRNA;
+AY216660.2	GbkToGff	gene	8127	8548	.	+	.	locus_tag=CPT-T1_010;ID=CPT-T1_010.gene;
+AY216660.2	GbkToGff	mRNA	8127	8548	.	+	.	locus_tag=CPT-T1_010;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_010.mRNA;Parent=CPT-T1_010.gene;
+AY216660.2	GbkToGff	CDS	8138	8548	.	+	0	locus_tag=CPT-T1_010;note=HHPred predicted structural similarity at 96%25 probability to Bsubtilis yqbG (myophage protein%2C see PMID 29279385) Protein Data Bank entry 1ZTS over most of protein%3B Orf no. 45 see PMID: 14972552;codon_start=1;transl_table=11;product=head-to-tail connector complex protein;translation=MNQETLIAVVEQMRKLVPALRKVPDETLYAWVEMAELFVCQKTFKDAYVKALALYALHLAFLDGALKGEDEDLESYSRRVTSFSLSGEFSQTFGEVTKNQSGDMMLSTPWGKMFEQLKARRRGRFALMTGLRGGCH;ID=CPT-T1_010.CDS.1;Parent=CPT-T1_010.mRNA;
+AY216660.2	GbkToGff	gene	8534	8919	.	+	.	locus_tag=CPT-T1_011;ID=CPT-T1_011.gene;
+AY216660.2	GbkToGff	mRNA	8534	8919	.	+	.	locus_tag=CPT-T1_011;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_011.mRNA;Parent=CPT-T1_011.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	8534	8538	.	+	.	locus_tag=CPT-T1_011;regulatory_class=ribosome_binding_site;ID=CPT-T1_011.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_011.mRNA;
+AY216660.2	GbkToGff	CDS	8548	8919	.	+	0	locus_tag=CPT-T1_011;note=HHPred predicted structural similarity at 96%25 probability to phage SPP1 gp15 Protein Data Bank entry 5A21 over most of protein%3B Orf no. 44 see PMID: 14972552;codon_start=1;transl_table=11;product=head-to-tail connector complex protein;translation=MNYSQIERMARKGVAFFTDPSRPMNLIKQGEYGYDENGFEIPPMEQVIPISGATRRPNAREIDGETIRASDILGIFNNDHEINEGDYIEIDGIRHVVVDARPVQASLEPVAYRPVLRRVSVGG;ID=CPT-T1_011.CDS.1;Parent=CPT-T1_011.mRNA;
+AY216660.2	GbkToGff	gene	8897	9355	.	+	.	locus_tag=CPT-T1_012;ID=CPT-T1_012.gene;
+AY216660.2	GbkToGff	mRNA	8897	9355	.	+	.	locus_tag=CPT-T1_012;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_012.mRNA;Parent=CPT-T1_012.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	8897	8901	.	+	.	locus_tag=CPT-T1_012;regulatory_class=ribosome_binding_site;ID=CPT-T1_012.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_012.mRNA;
+AY216660.2	GbkToGff	CDS	8912	9355	.	+	0	locus_tag=CPT-T1_012;codon_start=1;transl_table=11;product=hypothetical protein;translation=MANYQIRRFQGEIDAWINAAESTLEHAIEIFVRDVHDALVSRSPVDTGRFKGNWQITFNEIPNHALNRYDKTGGVVRGEEQAKTYGMFSRGGAITSVHFSNMLIYANALEYGHSQQAPSGVVGLVALRLRSYMADAIKQARRQQNAL;note=Orf no. 43 see PMID: 14972552;ID=CPT-T1_012.CDS.1;Parent=CPT-T1_012.mRNA;
+AY216660.2	GbkToGff	gene	9332	9743	.	+	.	locus_tag=CPT-T1_013;ID=CPT-T1_013.gene;
+AY216660.2	GbkToGff	mRNA	9332	9743	.	+	.	locus_tag=CPT-T1_013;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_013.mRNA;Parent=CPT-T1_013.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	9332	9336	.	+	.	locus_tag=CPT-T1_013;regulatory_class=ribosome_binding_site;ID=CPT-T1_013.Shine_Dalgarno_seqeunce.2;Parent=CPT-T1_013.mRNA;
+AY216660.2	GbkToGff	CDS	9345	9743	.	+	0	locus_tag=CPT-T1_013;note=HHPred predicted structural similarity at 96%25 probability to phage lambda minor tail protein U Protein Data Bank entry 3FZ2 over most of protein%3B Orf no. 42 see PMID: 14972552;codon_start=1;transl_table=11;product=minor tail protein;translation=MHYELSAAARAAFLSKYRDFPHYMENRNFTPPKDGGMWLRFNYIEGDTLYLSIDRKCKSYIAIVQIGVVFPPGSGVDEARLKAKEIADFFKDGKMLNVGYIFEGAIVHQIVKHESGWMIPVRFTVRVDTKET;ID=CPT-T1_013.CDS.1;Parent=CPT-T1_013.mRNA;
+AY216660.2	GbkToGff	gene	9733	10414	.	+	.	locus_tag=CPT-T1_014;ID=CPT-T1_014.gene;
+AY216660.2	GbkToGff	mRNA	9733	10414	.	+	.	locus_tag=CPT-T1_014;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_014.mRNA;Parent=CPT-T1_014.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	9733	9737	.	+	.	locus_tag=CPT-T1_014;regulatory_class=ribosome_binding_site;ID=CPT-T1_014.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_014.mRNA;
+AY216660.2	GbkToGff	CDS	9746	10414	.	+	0	locus_tag=CPT-T1_014;note=HHPred predicted structural similarity at 99%25 probability to phage lambda major tail protein V Protein Data Bank entry 2K4 over half of protein%3B Orf no. 41 see PMID: 14972552;codon_start=1;transl_table=11;product=major tail protein;translation=MHLPNGAQIFVETSRGVEVEATAITNAENPVATVASKGDLAKGDYVIVTQSTWAKMVSRVLIVTDAQETSITLAGIDTSDTLVFPAGGTMSFAKITGWTEIPCVQEIGQDGGEQQYYTYQCLSDDKEQQIPTFKSAVSLTYTFAHEFDNPIYPILRKLDSSGQVTAVRMYVPKASEMRMWAGILSFNDIPSTQVNEMETVELAVSLKGDFTFISSTLASPGA;ID=CPT-T1_014.CDS.1;Parent=CPT-T1_014.mRNA;
+AY216660.2	GbkToGff	gene	10516	10845	.	+	.	locus_tag=CPT-T1_015;ID=CPT-T1_015.gene;
+AY216660.2	GbkToGff	mRNA	10516	10845	.	+	.	locus_tag=CPT-T1_015;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_015.mRNA;Parent=CPT-T1_015.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	10516	10520	.	+	.	locus_tag=CPT-T1_015;regulatory_class=ribosome_binding_site;ID=CPT-T1_015.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_015.mRNA;
+AY216660.2	GbkToGff	CDS	10528	10845	.	+	0	locus_tag=CPT-T1_015;codon_start=1;transl_table=11;product=tape measure chaperone frameshift product;translation=MAKFNFVLGQLPDFKLPVTFTMPNGEDATIIFTVRHLSSKEVQDMYAKQGEMNDSDFITKIASGWNLEEEFNEENTRKLVQYYPSAAYNLTATYIKALAGHRAKN;ID=CPT-T1_015.CDS.1;Parent=CPT-T1_015.mRNA;
+AY216660.2	GbkToGff	gene	10516	11162	.	+	.	locus_tag=CPT-T1_016;ID=CPT-T1_016.gene;
+AY216660.2	GbkToGff	mRNA	10516	11162	.	+	.	locus_tag=CPT-T1_016;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_016.mRNA;Parent=CPT-T1_016.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	10516	10520	.	+	.	locus_tag=CPT-T1_016;regulatory_class=ribosome_binding_site;ID=CPT-T1_016.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_016.mRNA;
+AY216660.2	GbkToGff	CDS	10528	10839	.	+	0	locus_tag=CPT-T1_016;codon_start=1;transl_table=11;product=tape measure chaperone frameshift product;translation=MAKFNFVLGQLPDFKLPVTFTMPNGEDATIIFTVRHLSSKEVQDMYAKQGEMNDSDFITKIASGWNLEEEFNEENTRKLVQYYPSAAYNLTATYIKALAGHRAKKLKRAVYLLYQKPPTEEQLRSVGLSLSDYEDEEPETIIGDAEMVKAWNVFTSMLTQWRSSGAGAYGLDYNVLPMLFKIYKIEDEELALQDVRIMEAKALEMIAKQNN;note=Orf no. 40 see PMID: 14972552;ID=CPT-T1_016.CDS.1;Parent=CPT-T1_016.mRNA;
+AY216660.2	GbkToGff	CDS	10839	11162	.	+	0	locus_tag=CPT-T1_016;codon_start=1;transl_table=11;product=tape measure chaperone frameshift product;translation=MAKFNFVLGQLPDFKLPVTFTMPNGEDATIIFTVRHLSSKEVQDMYAKQGEMNDSDFITKIASGWNLEEEFNEENTRKLVQYYPSAAYNLTATYIKALAGHRAKKLKRAVYLLYQKPPTEEQLRSVGLSLSDYEDEEPETIIGDAEMVKAWNVFTSMLTQWRSSGAGAYGLDYNVLPMLFKIYKIEDEELALQDVRIMEAKALEMIAKQNN;note=Orf no. 40 see PMID: 14972552;ID=CPT-T1_016.CDS.1;Parent=CPT-T1_016.mRNA;
+AY216660.2	GbkToGff	gene	11192	14076	.	+	.	locus_tag=CPT-T1_017;ID=CPT-T1_017.gene;
+AY216660.2	GbkToGff	mRNA	11192	14076	.	+	.	locus_tag=CPT-T1_017;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_017.mRNA;Parent=CPT-T1_017.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	11192	11197	.	+	.	locus_tag=CPT-T1_017;regulatory_class=ribosome_binding_site;ID=CPT-T1_017.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_017.mRNA;
+AY216660.2	GbkToGff	CDS	11203	14076	.	+	0	locus_tag=CPT-T1_017;codon_start=1;transl_table=11;product=tape measure protein;translation=MVDKVAGLSLDVDVSTVQRAVKSLKEFSKANDQAADSMGSLINESEVAKQKAKEHAEQLRRQRKEYEAVEKAIDPTVSKMERLKIASQQLDKLWQQGVVPDETFFRLGEMLDLQNAKLARSRAMLTEEGQAALQEAKAKEQAAVRSKAFMDALNGQVNAIGKTHAELMELKAAELGLSKEAAPLIAKLKDQGRAMNAAGISAGEYRQAMRMLPAQITDVVTSLASGMPVWMVAIQQGGQIKDSFGGIGNTFKVLLSYINPVTAGVGVLVGSLGILAKAGYDSYKSITDIQNALIETGGYAGVTAEELDSVSKKIAQTSNSTIGSIREIVTELASSGKYTREQIQNITKATAEWSASTGKSASQIISEFEKIASDPVKGLKKLNEQYNFLEKGQLTYIDTLSRTKGETEAVSEATKLFADVMEKRMKSIADNATPLEKMWSDIKQWASDAWGWVGDHTLGALNLIIDVVQGTVIQVKMILAKGDEYISNFIASAIKATQSLPGMSDFGADVLKEQENIVKSSRDNYDQLASDLDAINARVEKGEMGYIEAMRQRRTLEKQYSEETKEAIRKEAEEIEKRNRERNKQSKIVRSPTEQFDKELISLRAQLKVLQEHKEIGQKLSAQRKALFTTEATIAVLREASSKRQLSAEEKALLASQERVIELAKQKAEIGDQIVKQQQLNDLTDKSLKFVNEMTAATEQLNASRGLSTRDMERQAELAKITTDYINSGGSEGDEKLQNMIKAQNDYYAAEDAKRADWLAGAESAFADYGDAAMDMYGNVNEIASSALNGMSDMMVQFLTTGKANFEDFAKNIIGMIIKMIAQMVIFNTISGMMGGKTWSFAGGASSGASAASQATPTPAASVFRSVSSGGAAVSLAAAAGSVATSGFNASNSAPKVVNHSGGGTVVDVSGMEVKVDNGSDPRGISQGVEMMFKKMIRESCSQGGEVYNYIQEKTGG;note=Orf no. 38 see PMID: 14972552;ID=CPT-T1_017.CDS.1;Parent=CPT-T1_017.mRNA;
+AY216660.2	GbkToGff	gene	14067	14432	.	+	.	locus_tag=CPT-T1_018;ID=CPT-T1_018.gene;
+AY216660.2	GbkToGff	mRNA	14067	14432	.	+	.	locus_tag=CPT-T1_018;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_018.mRNA;Parent=CPT-T1_018.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	14067	14072	.	+	.	locus_tag=CPT-T1_018;regulatory_class=ribosome_binding_site;ID=CPT-T1_018.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_018.mRNA;
+AY216660.2	GbkToGff	CDS	14079	14432	.	+	0	locus_tag=CPT-T1_018;note=similar to lambda tail tip protein M UniProt ID P03737%3B Orf no. 37 see PMID: 14972552;codon_start=1;transl_table=11;product=tail tip protein;translation=MATLDTFGWCTQVQGGGGSLTTTNSDRSIQFGNGYMQLASSGFNTTRREYSVVYAGEDFMAVYDFCNSHRIKPFAWTPPDGKIGIWVVKPNSLGAKPVSRDVMEINVTFMEQFTSME;ID=CPT-T1_018.CDS.1;Parent=CPT-T1_018.mRNA;
+AY216660.2	GbkToGff	gene	14500	15294	.	+	.	locus_tag=CPT-T1_019;ID=CPT-T1_019.gene;
+AY216660.2	GbkToGff	mRNA	14500	15294	.	+	.	locus_tag=CPT-T1_019;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_019.mRNA;Parent=CPT-T1_019.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	14500	14503	.	+	.	locus_tag=CPT-T1_019;regulatory_class=ribosome_binding_site;ID=CPT-T1_019.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_019.mRNA;
+AY216660.2	GbkToGff	CDS	14512	15294	.	+	0	locus_tag=CPT-T1_019;note=similar to lambda tail tip protein L UniProt ID P03738%3B Orf no. 36 see PMID: 14972552;codon_start=1;transl_table=11;product=tail tip protein;translation=MSENKKLYDEESGKSLFHNCLQSLYPGEIITLIEVDGSKFGAQVYRFHGENIQYTPEEIMQAQQTGTLPPKEITFRGEKYGARPFGISGISFDSSGKATKPQLTVANIDSRVSAMIRAYNGLMQAKVTIWITQRELINSDGSIADGAYRKLVYYIERPNYVDKSVARFDLTSPYDMDGIMIPSRLTQSVCYFAQRGWYKTGKGCGYNGQNGYFDKDNNPVDDPSLDFCPGTVTACRLRFGANNELDFGGCAVASLQRKNQ;ID=CPT-T1_019.CDS.1;Parent=CPT-T1_019.mRNA;
+AY216660.2	GbkToGff	gene	15279	16025	.	+	.	locus_tag=CPT-T1_020;ID=CPT-T1_020.gene;
+AY216660.2	GbkToGff	mRNA	15279	16025	.	+	.	locus_tag=CPT-T1_020;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_020.mRNA;Parent=CPT-T1_020.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	15279	15282	.	+	.	locus_tag=CPT-T1_020;regulatory_class=ribosome_binding_site;ID=CPT-T1_020.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_020.mRNA;
+AY216660.2	GbkToGff	CDS	15291	16025	.	+	0	locus_tag=CPT-T1_020;note=similar to lambda tail tip protein K UniProt ID P03729%3B Orf no. 35 see PMID: 14972552;codon_start=1;transl_table=11;product=tail tip protein;translation=MISAKIKLEIMTHAQEEYPRECCGVVTQKGRVQKYHRIDNVHRDPENHFMMDAVQYACIEDDAESTTIAIVHSHTGDGATTLPSAHDTCMCNEMEVTWIIVSVPEGDMRFVKPEKLPLIGRPWSLGSFDCYGLVMAWHKEHGVELRDRRLNFEWWKPEYGINLYQDYYKQDGFVEIPDQNNPSFGDMVIMQIGQNVPVWNHAGIYLGDNQILHHAFGKLSRRDIYSGWYQDHTVLIVRHKDLKL;ID=CPT-T1_020.CDS.1;Parent=CPT-T1_020.mRNA;
+AY216660.2	GbkToGff	gene	16009	16621	.	+	.	locus_tag=CPT-T1_021;ID=CPT-T1_021.gene;
+AY216660.2	GbkToGff	mRNA	16009	16621	.	+	.	locus_tag=CPT-T1_021;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_021.mRNA;Parent=CPT-T1_021.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	16009	16012	.	+	.	locus_tag=CPT-T1_021;regulatory_class=ribosome_binding_site;ID=CPT-T1_021.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_021.mRNA;
+AY216660.2	GbkToGff	CDS	16022	16621	.	+	0	locus_tag=CPT-T1_021;note=single transmembrane domain predicted N-in and C-out%3B similar to lambda tail tip assembly protein I UniProt ID P03730%3B Orf no. 34 see PMID: 14972552;codon_start=1;transl_table=11;product=tail assembly protein;translation=MNDVKVIKLSGSLGRRFGVFHRYAVDSYPEAIRALSSQVDGFKEYMQSEVGSRSKFAIFVDGVNVGHHEEEKFKCAKEIRIVPIPTGSKTGGLFQVVLGAAIMVAAFYTGGASLALMGTMSSSLFMMGGAMVLGGVMQMISPQPGGANFEVQSSKNKPSYAFGGAVNTTAAGYPLPVPYGYRAGGGATFSAGSYAEDMS;ID=CPT-T1_021.CDS.1;Parent=CPT-T1_021.mRNA;
+AY216660.2	GbkToGff	gene	16688	20217	.	+	.	locus_tag=CPT-T1_022;ID=CPT-T1_022.gene;
+AY216660.2	GbkToGff	mRNA	16688	20217	.	+	.	locus_tag=CPT-T1_022;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_022.mRNA;Parent=CPT-T1_022.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	16688	16691	.	+	.	locus_tag=CPT-T1_022;regulatory_class=ribosome_binding_site;ID=CPT-T1_022.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_022.mRNA;
+AY216660.2	GbkToGff	CDS	16699	20217	.	+	0	locus_tag=CPT-T1_022;note=similar to phage lambda tip attachment protein J UniProt ID P03749%3B Orf no. 33 see PMID: 14972552;codon_start=1;transl_table=11;product=tail fiber;translation=MIQKVISGSKGGSQKPHNPVEMEDNLISINKIKILLAVSDGEIDETFSLKQLMFNSVPVQNEDGSFNFEGVKAEFRPGTQTQEYIKGMEDSSSEVTVNREVTTDNPYTISVTNKTLSAIRIKMFMPRGVRIESNGDKNGVRVEYEVQQAVDGGSFETVLTDVIEGKTMSGYDRSRRVNLPNFNNQVIFRVVRKTPDSNDSNVVDAIQVKSYAEVIDAKFRYPLTGLLFVEFDSKMFPNQLPTISIRKRWKIVNVPSNYDPESRTYNGNWDGTFKKAWTNNPAWVLYDLMINQRYGLDQKELGIAVDKWALYEAAQYCDQMVPDGKGGTEPRYLCDVIIQSQTDAYKVIRDICSIFRGMSFWNGESISVIIDRPREPAYIFTNDNVVNGDFSYTFASEKSMYTTCNVMFDDEQNMYQQDVEPVFDREATLRFGNNVTSITAIGCTRRSEANRRGRWILKTNLRSTTVNFATGLEGMIPTIGDVVAIADNFWSSNLTMNLSGRLLEVSGSQIFLPFRVDARAGDFIIVNKPDGKPVKRTISSVSADGKTIEVNIGFGFPVKPNTVFAIDRTDIALQQYVVTKIDKGDDDEEFTYKITAVEYDPNKYDEIDYGVNIDDRPTSIVEPDQIPRPKNVQVSSESRIVQGMSVETMIVSWDKVPYAVFYDVQWRKDNGNWQNVPQTANKEVYVEGIYAGNYQVRVRSVAGSGTTSGWSNIVAATLTGKQGEPGRPINLTATDDVVFGIRTKWGFSDGSGDTAYTELQQSPDGTVDNASLLSLIPYPQHEYYHSPMPGGNIVWYRVRTVDRIGNVSQWTDFVRGMASTNVDDIIGEISVDIENSPGYEWLVDNATDNAAQNSANAEAAIENALANDKDAIYMKKENGKRKAEYTKSLKLIADETQARVTAIEQLKASFGDQISASNSELREVIATETEALSREIDQLKAQIGDDIQASLTDIREVIATETEALSREIDQLKAQIGDDIQASLTDIREAIANETEARTQADLTLSARLGNNEAALAQKLDSWSNADSTGAMYGVKLGLKYNGQEYSAGMAMSLVGSGAAVKAQILFEASRFAIMTGMNGQTQYPFVVENGQVILSSAIIKNGFITNAMIGNFIQSNNYVFNQSGWRLDKGGTFENYGSDGEGAMKQTNTTISVRDASGRLRVQIGRLTGSW;ID=CPT-T1_022.CDS.1;Parent=CPT-T1_022.mRNA;
+AY216660.2	GbkToGff	gene	20251	20568	.	+	.	locus_tag=CPT-T1_023;ID=CPT-T1_023.gene;
+AY216660.2	GbkToGff	mRNA	20251	20568	.	+	.	locus_tag=CPT-T1_023;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_023.mRNA;Parent=CPT-T1_023.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	20251	20255	.	+	.	locus_tag=CPT-T1_023;regulatory_class=ribosome_binding_site;ID=CPT-T1_023.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_023.mRNA;
+AY216660.2	GbkToGff	CDS	20263	20568	.	+	0	locus_tag=CPT-T1_023;codon_start=1;transl_table=11;product=hypothetical protein;translation=MAYGISTWDANGVYNNYGIKPITVVGWNFLSAGQNSASFSYQVPPGMHVNYVISLDDGAISGPGRKIIASGNTITVTPTNSPGPNVYPSSNCYLIAYLEND;note=Orf no. 32 see PMID: 14972552;ID=CPT-T1_023.CDS.1;Parent=CPT-T1_023.mRNA;
+AY216660.2	GbkToGff	gene	20556	21257	.	+	.	locus_tag=CPT-T1_024;ID=CPT-T1_024.gene;
+AY216660.2	GbkToGff	mRNA	20556	21257	.	+	.	locus_tag=CPT-T1_024;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_024.mRNA;Parent=CPT-T1_024.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	20556	20559	.	+	.	locus_tag=CPT-T1_024;regulatory_class=ribosome_binding_site;ID=CPT-T1_024.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_024.mRNA;
+AY216660.2	GbkToGff	CDS	20568	21257	.	+	0	locus_tag=CPT-T1_024;note=InterPro domain IPR013750%3B GHMP kinase domain- containing protein%3B Orf no. 31 see PMID: 14972552;codon_start=1;transl_table=11;product=hypothetical protein;translation=MSYGAFIDVNGNPFITPLSTPFALYARGEIQSVNVSGSQVAERYVRIPTGVPVIAFCKTTNTQQGTALSAFTFRSGPNVGTVYIRGTNPANQSYTLTYYIFAIFEQSLPRWGMAIWDASGKLVLTNETKVLSDLVTIGTPGYAGGGLNIDTTLSGSYAVVPTILGNYQVVIGRLPTGQPIIGNSTAGSSCRYNGSTTRINAAATTAAGQIMNTTNNGNIITAIKTAAYD;ID=CPT-T1_024.CDS.1;Parent=CPT-T1_024.mRNA;
+AY216660.2	GbkToGff	gene	21279	21518	.	-	.	locus_tag=CPT-T1_025;ID=CPT-T1_025.gene;
+AY216660.2	GbkToGff	mRNA	21279	21518	.	-	.	locus_tag=CPT-T1_025;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_025.mRNA;Parent=CPT-T1_025.gene;
+AY216660.2	GbkToGff	CDS	21279	21506	.	-	0	locus_tag=CPT-T1_025;codon_start=1;transl_table=11;product=hypothetical protein;translation=MKKLITIIAAAFILTGCSSMPERTCTAIYESGGAEYSVYVFGSKMRGKEMVLRAGYPFSFNYVSEKNFKSHDCSI;note=Orf no. 30 see PMID: 14972552;ID=CPT-T1_025.CDS.1;Parent=CPT-T1_025.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	21515	21518	.	-	.	locus_tag=CPT-T1_025;regulatory_class=ribosome_binding_site;ID=CPT-T1_025.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_025.mRNA;
+AY216660.2	GbkToGff	gene	21755	21906	.	+	.	locus_tag=CPT-T1_026;ID=CPT-T1_026.gene;
+AY216660.2	GbkToGff	mRNA	21755	21906	.	+	.	locus_tag=CPT-T1_026;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_026.mRNA;Parent=CPT-T1_026.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	21755	21758	.	+	.	locus_tag=CPT-T1_026;regulatory_class=ribosome_binding_site;ID=CPT-T1_026.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_026.mRNA;
+AY216660.2	GbkToGff	CDS	21766	21906	.	+	0	locus_tag=CPT-T1_026;codon_start=1;transl_table=11;product=hypothetical protein;translation=MIRQISIMYVQNLINLDSICRYLCISNKKRLQVLRNRQRIKIYLSH;ID=CPT-T1_026.CDS.1;Parent=CPT-T1_026.mRNA;
+AY216660.2	GbkToGff	gene	22020	23098	.	+	.	locus_tag=CPT-T1_027;ID=CPT-T1_027.gene;
+AY216660.2	GbkToGff	mRNA	22020	23098	.	+	.	locus_tag=CPT-T1_027;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_027.mRNA;Parent=CPT-T1_027.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	22020	22024	.	+	.	locus_tag=CPT-T1_027;regulatory_class=ribosome_binding_site;ID=CPT-T1_027.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_027.mRNA;
+AY216660.2	GbkToGff	CDS	22034	23098	.	+	0	locus_tag=CPT-T1_027;codon_start=1;transl_table=11;product=exodeoxyribonuclease VIII;translation=MFQVFTSSQLSNDEYHRNEGWASEYVSGSSLAEIYQTCPANWRFKKNETTKALEFGTQSHTNFESRDLFTATYARCPAPSEFKDLITSQAALAAKLKSFGLKGTSGKQYPDLIKMMVDCGEELNVQYLIELIAEAEARAEGKQLVDADKYDACMKMRAILEQNPDHEACINSETAQREISIFGEISGVKVKVRLDHLDYKENVPGRVLTGYDENGDPVFEDVIFPEALIITDFKTTMSANPLEFPRLAYNHGYYLKMALQHDLLRRAIQAGAFEGNFPEDIPIVVRLLAQEKKEPYIALAYRMTMEQIRIGRNQYISVVHTYKACSEMDVWPGYAGDASEIELETPSWVRYQNK;note=Orf no. 29 see PMID: 14972552;ID=CPT-T1_027.CDS.1;Parent=CPT-T1_027.mRNA;
+AY216660.2	GbkToGff	gene	23129	23820	.	+	.	locus_tag=CPT-T1_028;ID=CPT-T1_028.gene;
+AY216660.2	GbkToGff	mRNA	23129	23820	.	+	.	locus_tag=CPT-T1_028;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_028.mRNA;Parent=CPT-T1_028.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	23129	23133	.	+	.	locus_tag=CPT-T1_028;regulatory_class=ribosome_binding_site;ID=CPT-T1_028.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_028.mRNA;
+AY216660.2	GbkToGff	CDS	23140	23820	.	+	0	locus_tag=CPT-T1_028;note=InterPro domain IPR007499%3B Orf no. 28 see PMID: 14972552;codon_start=1;transl_table=11;product=recombinase;translation=MHLIHQSGEVKMQLSPETNEILPALFNARNKFAKAKKDAKNNHLKNSYATLDAMMAAVSPALTDNDIMILQSMLDTSTETTFHLETMLIHKSGQWAKFFMMMPIAKRDPQGVGSAMTYARRYSLAAALGISQSDDDAQLAVKSVKDWKKELDACEDIESLKDVWANAYRQTDTASKSIIQDHYNALKAKFEIGKARGIRPAQPEQKKQVEATSAKPVQSQSITNFE;ID=CPT-T1_028.CDS.1;Parent=CPT-T1_028.mRNA;
+AY216660.2	GbkToGff	gene	23854	24289	.	+	.	locus_tag=CPT-T1_029;ID=CPT-T1_029.gene;
+AY216660.2	GbkToGff	mRNA	23854	24289	.	+	.	locus_tag=CPT-T1_029;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_029.mRNA;Parent=CPT-T1_029.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	23854	23858	.	+	.	locus_tag=CPT-T1_029;regulatory_class=ribosome_binding_site;ID=CPT-T1_029.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_029.mRNA;
+AY216660.2	GbkToGff	CDS	23867	24289	.	+	0	locus_tag=CPT-T1_029;note=InterPro domain IPR012340,Orf no. 27 see PMID: 14972552;codon_start=1;transl_table=11;product=single-stranded DNA-binding protein;translation=MHIITGEIRKEPKILERNGGNTYIIELAESYKPRDGDREYTNYTFFFSDGGKPGLADWYREAFQVGRVISVSCETLKISSREHNGMIYNSLQAADFPKLVFSQRGQSNQQQRAPQQQQRSQQQSQPQPNQQSTFDDDIPF;ID=CPT-T1_029.CDS.1;Parent=CPT-T1_029.mRNA;
+AY216660.2	GbkToGff	gene	24351	26550	.	-	.	locus_tag=CPT-T1_030;ID=CPT-T1_030.gene;
+AY216660.2	GbkToGff	mRNA	24351	26550	.	-	.	locus_tag=CPT-T1_030;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_030.mRNA;Parent=CPT-T1_030.gene;
+AY216660.2	GbkToGff	CDS	24351	26537	.	-	0	locus_tag=CPT-T1_030;codon_start=1;transl_table=11;product=hypothetical protein;translation=MALYRRGTASMDADGTVHGTDTKWKDQLALIRVGATIVFLEQPIKLAVISDIVSDTELKAISTDGQTAADGKYVILLNDSLTVNGLAQNVAETLRYYQSKETEIASALDIIADLDMDNLNNIVQEIKSNKSAAEAAQNQAELARDSANSARDESISIKNQTQQISDSAIGSINAAKDKAITNVQQKENSAVTHINSEEAAAIQAINDAKGDLSGYVNDAQTAAQTATSAKNDAQAARDAAVSAKDAAAVSAQEAQDAANSVNADNLLTKDGNLSGLADKEQSKKNLAVNRLNQPRGDLTEIYSNDDRTGFKLIVKDSGDWGAMTHDGSENKALGVNFGGTGGTTEEQARTSLKVYKLDRTNLGEKHLDSITGEGDGPGIYMQSSSALATASRGYPEATAGMLEVLPNGANGASACIQRFTPFTYLGTAPESGNSQNEYARAGRGTFYIRMKNGNNAKFSPWIPFQASSSGNVVSSPASNEKSSWVDYVNALSSQPSSLASYNVNSVGWVTAISVRHRNGQGDGSAFGFVIEDASMTSPHYKDVRLRKQTGAGQWQSTQVIWNTGNTTVDSNGFIKRASPIVDIFGNGSHRTNDESEGCTVERISTGEYLIRGCLSLNSDLAWGGVNGGIEIPKDINGQPILWVDYDVNPDGSLVIKTYHRTHDNAPSFARNHKDGYSDGDPIDIPSDVFVSVRVEMPNDSIYNKKVEECKRNHERMVSGEFVESLKNT;note=Orf no. 26 see PMID: 14972552;ID=CPT-T1_030.CDS.1;Parent=CPT-T1_030.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	26546	26550	.	-	.	locus_tag=CPT-T1_030;regulatory_class=ribosome_binding_site;ID=CPT-T1_030.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_030.mRNA;
+AY216660.2	GbkToGff	gene	26447	26600	.	-	.	locus_tag=CPT-T1_031;ID=CPT-T1_031.gene;
+AY216660.2	GbkToGff	mRNA	26447	26600	.	-	.	locus_tag=CPT-T1_031;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_031.mRNA;Parent=CPT-T1_031.gene;
+AY216660.2	GbkToGff	CDS	26447	26590	.	-	0	locus_tag=CPT-T1_031;codon_start=1;transl_table=11;product=hypothetical protein;translation=MIKSLLSNSCYLLKEFILWLYIDAVLHQWMQTVRFTEPIQNGKISLL;note=Orf no. 25 see PMID: 14972552;ID=CPT-T1_031.CDS.1;Parent=CPT-T1_031.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	26597	26600	.	-	.	locus_tag=CPT-T1_031;regulatory_class=ribosome_binding_site;ID=CPT-T1_031.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_031.mRNA;
+AY216660.2	GbkToGff	gene	26638	27585	.	-	.	locus_tag=CPT-T1_032;ID=CPT-T1_032.gene;
+AY216660.2	GbkToGff	mRNA	26638	27585	.	-	.	locus_tag=CPT-T1_032;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_032.mRNA;Parent=CPT-T1_032.gene;
+AY216660.2	GbkToGff	CDS	26638	27573	.	-	0	locus_tag=CPT-T1_032;codon_start=1;transl_table=11;product=DNA primase;translation=MNEEFMMFQKEDVLPYMKGLWREAFQSICGLPNNVFNKKHQPCPNCGGKDRFRWTDNLNTPGDGGAICNSCGNDSGIGWLMKLTGMPYSECVNILGRFLGKVPQEYIVKANKKARRTPVSGVNVMMAEHEAVMKVMERTEKRVNTPLSVFESLPTESFDVGIKRSEDGRESVFHTIPCQLVHEDGLDDEFCNILIIDEEGRESFYAKKYTSCSVAVTGKTEKAIYLCLNWIDAQHIAFHTKQEVWACFTPENLEMVAYRYKGDREVRVACEPSDKDTLYMADDRQLKIIIPNPGGYRSGMQAKLFSASDLL;note=alternative start codon to Orf no. 24 see PMID: 14972552;ID=CPT-T1_032.CDS.1;Parent=CPT-T1_032.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	27582	27585	.	-	.	locus_tag=CPT-T1_032;regulatory_class=ribosome_binding_site;ID=CPT-T1_032.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_032.mRNA;
+AY216660.2	GbkToGff	gene	27635	28098	.	-	.	locus_tag=CPT-T1_033;ID=CPT-T1_033.gene;
+AY216660.2	GbkToGff	mRNA	27635	28098	.	-	.	locus_tag=CPT-T1_033;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_033.mRNA;Parent=CPT-T1_033.gene;
+AY216660.2	GbkToGff	CDS	27635	28087	.	-	0	locus_tag=CPT-T1_033;note=HHPred predicted structural similarity at 97%25 probability to phage P22 repression protein C2 Protein Data Bank entry 2R1J over most of protein and phage lambda repressor entry 3BDN%3BOrf no. 23 see PMID:  14972552;codon_start=1;transl_table=11;product=transcriptional regulator;translation=MSIQRIAESTGEIDKRHINGNNGTRRGKDKKPRQRCGFYIHKEETRAGLRARLDALIEYYGGPAACAKALKVSNQTVQGWKERNMISWQGAEAAHRAYRRQGCKGFRAAWLRFDLKFDGNGKCLEKRCKNKKFMRVVKREDIGTTNSIFS;ID=CPT-T1_033.CDS.1;Parent=CPT-T1_033.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	28095	28098	.	-	.	locus_tag=CPT-T1_033;regulatory_class=ribosome_binding_site;ID=CPT-T1_033.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_033.mRNA;
+AY216660.2	GbkToGff	gene	28168	30199	.	+	.	locus_tag=CPT-T1_034;ID=CPT-T1_034.gene;
+AY216660.2	GbkToGff	mRNA	28168	30199	.	+	.	locus_tag=CPT-T1_034;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_034.mRNA;Parent=CPT-T1_034.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	28168	28171	.	+	.	locus_tag=CPT-T1_034;regulatory_class=ribosome_binding_site;ID=CPT-T1_034.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_034.mRNA;
+AY216660.2	GbkToGff	CDS	28181	30199	.	+	0	locus_tag=CPT-T1_034;codon_start=1;transl_table=11;product=DNA helicase;translation=MTMNIKKQIALLGDDYIKRTQERFTVGEVVPYPYQVVAYAEIAKRLSNYEHPFFVKASVSAGKTIIFAMVAKQCQKMGLKMLVLARQGEIVDQDSEEIDNFGVTNSIFSASLGIKSCYFPIVVGSEGTVANGLDNELADFVPHVIGIDECHQVDWEDLAQAIEGKETMEQMRGEKGKIIMDGDIPLIGNDGKPLLGTKRSQYTIVIMEMMRRCKKVHGHDLRIFGMTGSEFRGVVPILVENPKALGFWRERVTDIDTNYLIEFGSVVPTIFGSTDGVHYDLDKFKASSEDGVQDFTEKDMKAMEDEILHDKSLTQRIMQMVAKKAEERNAVLITCAGVRHCKEAAAALPPGSTYAIITGDTDNKARKKILDDVRAGKIKYTFQVMALTTGVNVPNWDFSVILRKIGSLTLLIQLLGRGMRLLKSWQVAEGMVKQDHLVWDFAGTMDELGQLYFDPILEQAQFQKRFENGKDPKTCPKCGCVNSFYARRCVNVIDGERCDHFWTSQICEDQVDERTGKILVKGCGAENDVVARVCRCCDASLVDPNLKLSGKAYTKNDWYEVKNFEVTLTKNQKGIIYKYTLINDDGDEFKAYEKFFPESDSKICGTLWKTKGVLPHVSDPKMRRYFIGMKNAIKILQYSHHIAHPVRVTHRRNQKKEDIISRKDFGMEDIPE;note=Orf no. 22 see PMID: 14972552;ID=CPT-T1_034.CDS.1;Parent=CPT-T1_034.mRNA;
+AY216660.2	GbkToGff	gene	30181	30612	.	+	.	locus_tag=CPT-T1_035;ID=CPT-T1_035.gene;
+AY216660.2	GbkToGff	mRNA	30181	30612	.	+	.	locus_tag=CPT-T1_035;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_035.mRNA;Parent=CPT-T1_035.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	30181	30185	.	+	.	locus_tag=CPT-T1_035;regulatory_class=ribosome_binding_site;ID=CPT-T1_035.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_035.mRNA;
+AY216660.2	GbkToGff	CDS	30196	30612	.	+	0	locus_tag=CPT-T1_035;note=HHPred predicted structural similarity at 97%25 probability to B. subtilis recombination protein U/resolvase Protein Data Bank entry 1ZP7 over most of protein%3B Orf no. 21 see PMID: 14972552;codon_start=1;transl_table=11;product=Holliday junction resolvase;translation=MITDKGDYLEFYERDTSDTRKEDAHQVDCVSWLKYNFPHLLFWHTVNEGEKTITSALRDEQAGLLKGVSDFVILIGVNSRYPFAAIELKRVNKSGKGKASPVSDKQREFLQKVRERGGFSAVAYGFGQFKIAIYEMMK;ID=CPT-T1_035.CDS.1;Parent=CPT-T1_035.mRNA;
+AY216660.2	GbkToGff	gene	30669	31393	.	+	.	locus_tag=CPT-T1_036;ID=CPT-T1_036.gene;
+AY216660.2	GbkToGff	mRNA	30669	31393	.	+	.	locus_tag=CPT-T1_036;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_036.mRNA;Parent=CPT-T1_036.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	30669	30672	.	+	.	locus_tag=CPT-T1_036;regulatory_class=ribosome_binding_site;ID=CPT-T1_036.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_036.mRNA;
+AY216660.2	GbkToGff	CDS	30680	31393	.	+	0	locus_tag=CPT-T1_036;codon_start=1;transl_table=11;product=DNA adenine methyltransferase;translation=MKDFNDIETIDFAETGCSFTREAIASGGYYQALKTPTCKEISGRRYKGTNTPDAVRDLWSTPREVIAYLEGRYGKYDLDAAASEENKVCEKFYSQETNCLKRWWGKNKHVWLNPPYSRPDIFVKKAIEQMEHNNQIDMLLPADNSTAWFTEARQNAAEIIWIEADLTEDIDGNEYARSGRLAFISGETGKAVDGNNKGSVIFIMRELKEGEVQQTHYIPITSICPSVKNKRAKVRKV;note=Orf no. 20 see PMID: 14972552;ID=CPT-T1_036.CDS.1;Parent=CPT-T1_036.mRNA;
+AY216660.2	GbkToGff	gene	31377	31641	.	+	.	locus_tag=CPT-T1_037;ID=CPT-T1_037.gene;
+AY216660.2	GbkToGff	mRNA	31377	31641	.	+	.	locus_tag=CPT-T1_037;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_037.mRNA;Parent=CPT-T1_037.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	31377	31380	.	+	.	locus_tag=CPT-T1_037;regulatory_class=ribosome_binding_site;ID=CPT-T1_037.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_037.mRNA;
+AY216660.2	GbkToGff	CDS	31393	31641	.	+	0	locus_tag=CPT-T1_037;codon_start=1;transl_table=11;product=hypothetical protein;translation=MSEKMVPVKLTEQGLWLLYRATCCEIMERNGLTQDVIGCDLWEFTSSLDMSFDEIKNEYIENWPSIIQKDVEELKADTIVQH;note=alternative start codon to Orf no. 19 see PMID: 14972552;ID=CPT-T1_037.CDS.1;Parent=CPT-T1_037.mRNA;
+AY216660.2	GbkToGff	gene	31695	31917	.	+	.	locus_tag=CPT-T1_038;ID=CPT-T1_038.gene;
+AY216660.2	GbkToGff	mRNA	31695	31917	.	+	.	locus_tag=CPT-T1_038;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_038.mRNA;Parent=CPT-T1_038.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	31695	31698	.	+	.	locus_tag=CPT-T1_038;regulatory_class=ribosome_binding_site;ID=CPT-T1_038.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_038.mRNA;
+AY216660.2	GbkToGff	CDS	31708	31917	.	+	0	locus_tag=CPT-T1_038;codon_start=1;transl_table=11;product=hypothetical protein;translation=MARINANFFNIAQQSAKMAVHITNKQGGNFDWDIAMNFLKMSYYRCSVEEVEGFISDVEKLTNADKKAR;note=Orf no. 18 see PMID: 14972552;ID=CPT-T1_038.CDS.1;Parent=CPT-T1_038.mRNA;
+AY216660.2	GbkToGff	gene	31881	32167	.	+	.	locus_tag=CPT-T1_039;ID=CPT-T1_039.gene;
+AY216660.2	GbkToGff	mRNA	31881	32167	.	+	.	locus_tag=CPT-T1_039;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_039.mRNA;Parent=CPT-T1_039.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	31881	31884	.	+	.	locus_tag=CPT-T1_039;regulatory_class=ribosome_binding_site;ID=CPT-T1_039.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_039.mRNA;
+AY216660.2	GbkToGff	CDS	31895	32167	.	+	0	locus_tag=CPT-T1_039;codon_start=1;transl_table=11;product=hypothetical protein;translation=MLIKKQGKREVWEHAKECGISDDIALIAKYFDIKDVSIISNGKISFMEGMPRKMQRVPATPSLEFYREEGKRIERERKSTKNGKSSRLKY;ID=CPT-T1_039.CDS.1;Parent=CPT-T1_039.mRNA;
+AY216660.2	GbkToGff	gene	32099	32399	.	+	.	locus_tag=CPT-T1_040;ID=CPT-T1_040.gene;
+AY216660.2	GbkToGff	mRNA	32099	32399	.	+	.	locus_tag=CPT-T1_040;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_040.mRNA;Parent=CPT-T1_040.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	32099	32102	.	+	.	locus_tag=CPT-T1_040;regulatory_class=ribosome_binding_site;ID=CPT-T1_040.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_040.mRNA;
+AY216660.2	GbkToGff	CDS	32112	32399	.	+	0	locus_tag=CPT-T1_040;codon_start=1;transl_table=11;product=hypothetical protein;translation=MSEKENPQKTASLPGLNINADEYQAIWIGKKQVKQIPFSDWLPPDFVNVLCTIGIEQELHIGYYSPGRNSMMLEVDGKLVEFKSSDLGFWLKAVA;note=Orf no. 17 see PMID: 14972552;ID=CPT-T1_040.CDS.1;Parent=CPT-T1_040.mRNA;
+AY216660.2	GbkToGff	gene	32467	33611	.	+	.	locus_tag=CPT-T1_041;ID=CPT-T1_041.gene;
+AY216660.2	GbkToGff	mRNA	32467	33611	.	+	.	locus_tag=CPT-T1_041;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_041.mRNA;Parent=CPT-T1_041.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	32467	32470	.	+	.	locus_tag=CPT-T1_041;regulatory_class=ribosome_binding_site;ID=CPT-T1_041.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_041.mRNA;
+AY216660.2	GbkToGff	CDS	32478	33611	.	+	0	locus_tag=CPT-T1_041;note=HHPred predicted structural similarity at 99%25 probability to E. coli exonuclease SbcD Protein Data Bank entry 4LTY over two thirds of protein%3B Orf no. 16 see PMID: 14972552;codon_start=1;transl_table=11;product=exonuclease;translation=MSQAKITTEQLIEERMSGLTLREIAEKYGMHIRTVEARHAKLAKEGHFHGNEHVAKMVPEGFMVKGTSTMIDAEGNEKIRWVKTSVDNERLEVLMEKAREAFCSELPKAIPSESPDVSFDEDTLAMYPVFDLHIGALAHKHECGENYDTATAEKVMNGFFDYAVDKAPNSKNAVLVLGGDFLHYDSLESKTPASGHYLDSDSRYAKLVYVAIRSVRRAVSRMLEKHQVIDIKAISGNHDESGMVWLRAALAAFYEDEPRVNVDVSPAAMMMTSFGKTLIGYTHGHQMRKADTRLSVMATDFRKLFGQSDYVYTHSGHWHSQKITETNLGIDEVHGQLGSPDAYSANGGWRSQRQAAVIVYHKEFGEVGRFICRPEMF;ID=CPT-T1_041.CDS.1;Parent=CPT-T1_041.mRNA;
+AY216660.2	GbkToGff	gene	33673	34166	.	+	.	locus_tag=CPT-T1_042;ID=CPT-T1_042.gene;
+AY216660.2	GbkToGff	mRNA	33673	34166	.	+	.	locus_tag=CPT-T1_042;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_042.mRNA;Parent=CPT-T1_042.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	33673	33677	.	+	.	locus_tag=CPT-T1_042;regulatory_class=ribosome_binding_site;ID=CPT-T1_042.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_042.mRNA;
+AY216660.2	GbkToGff	CDS	33684	34166	.	+	0	locus_tag=CPT-T1_042;note=similar to phage T7 protein 3.8%3B InterPro domain IPR003615%3B Orf no. 15 see PMID: 14972552;codon_start=1;transl_table=11;product=HNH endonuclease;translation=MNWHEHYEYRDGVLYHKVKPCRRHDVNIGDVAGRVAKNGYHYVVHKNRPYKRSRVIWEMFNGEIPDGFVIDHLNHNATDDRIDNLECKPRRENMVNVKLRIDSTTGVTGVSRKRDNKWRAYITIMGKQKCKSFDTFEEACAQRIEWSVTHDFHPNHGGTY;ID=CPT-T1_042.CDS.1;Parent=CPT-T1_042.mRNA;
+AY216660.2	GbkToGff	gene	34226	34415	.	+	.	locus_tag=CPT-T1_043;ID=CPT-T1_043.gene;
+AY216660.2	GbkToGff	mRNA	34226	34415	.	+	.	locus_tag=CPT-T1_043;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_043.mRNA;Parent=CPT-T1_043.gene;
+AY216660.2	GbkToGff	CDS	34239	34415	.	+	0	locus_tag=CPT-T1_043;codon_start=1;transl_table=11;product=hypothetical protein;translation=MKIVKCIRNDSKTLPFRVNQIYSVGYDFGGGLFEIYDGRGSAIQTPLNGHYLEFIEID;note=Orf no. 14 see PMID: 14972552;ID=CPT-T1_043.CDS.1;Parent=CPT-T1_043.mRNA;
+AY216660.2	GbkToGff	gene	34525	34749	.	+	.	locus_tag=CPT-T1_044;ID=CPT-T1_044.gene;
+AY216660.2	GbkToGff	mRNA	34525	34749	.	+	.	locus_tag=CPT-T1_044;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_044.mRNA;Parent=CPT-T1_044.gene;
+AY216660.2	GbkToGff	CDS	34534	34749	.	+	0	locus_tag=CPT-T1_044;note=1 transmembrane domain%2C predicted N-out and C-in%3B Orf no. 13 see PMID: 14972552;codon_start=1;transl_table=11;product=pinholin class 2;translation=MKEFLTAATSSTGGASLVGAATGQLYIAGATFICFLLFGAWGAYWKYRDSKAIQEALNDGDLNKALKIRGR;ID=CPT-T1_044.CDS.1;Parent=CPT-T1_044.mRNA;
+AY216660.2	GbkToGff	gene	34739	35237	.	+	.	locus_tag=CPT-T1_045;ID=CPT-T1_045.gene;
+AY216660.2	GbkToGff	mRNA	34739	35237	.	+	.	locus_tag=CPT-T1_045;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_045.mRNA;Parent=CPT-T1_045.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	34739	34743	.	+	.	locus_tag=CPT-T1_045;regulatory_class=ribosome_binding_site;ID=CPT-T1_045.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_045.mRNA;
+AY216660.2	GbkToGff	CDS	34749	35237	.	+	0	locus_tag=CPT-T1_045;codon_start=1;transl_table=11;product=SAR endolysin;translation=MSLKNNVIGASIGAALTLTPTLLERIEGIEYEVYYDIAGVPTVCSGITGPDVIPGKKYTKRECDALLIKHIGVAQRYVDKKVKVDIPVTMRASLYSFTFNVGTGAFGSSTMLKLINQRKHKEACNQLWRWVYYYNPKTKKREVSRGIKNRRAEEYAYCVKEL;note=Orf no. 12 see PMID: 14972552;ID=CPT-T1_045.CDS.1;Parent=CPT-T1_045.mRNA;
+AY216660.2	GbkToGff	gene	35227	35638	.	+	.	locus_tag=CPT-T1_046;ID=CPT-T1_046.gene;
+AY216660.2	GbkToGff	mRNA	35227	35638	.	+	.	locus_tag=CPT-T1_046;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_046.mRNA;Parent=CPT-T1_046.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	35227	35230	.	+	.	locus_tag=CPT-T1_046;regulatory_class=ribosome_binding_site;ID=CPT-T1_046.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_046.mRNA;
+AY216660.2	GbkToGff	CDS	35237	35638	.	+	0	locus_tag=CPT-T1_046;note=PMID 30135120 shows molecular function of unimolecular spanin%3B Orf no. 11 see PMID: 14972552;codon_start=1;transl_table=11;product=u-spanin;translation=MKLKKTCIAITVAVGVISLSGCSTASALSGLLSDSPDVTAQVGAENTKQLAGVTAKADDKREVKVSDSNIGKIDSSVKKSVEVSTIQANTVNAESITVTKSGSWYDPVVCWILVFIVLLLFYFLIRKHEKKEA;ID=CPT-T1_046.CDS.1;Parent=CPT-T1_046.mRNA;
+AY216660.2	GbkToGff	gene	35761	36179	.	-	.	locus_tag=CPT-T1_047;ID=CPT-T1_047.gene;
+AY216660.2	GbkToGff	mRNA	35761	36179	.	-	.	locus_tag=CPT-T1_047;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_047.mRNA;Parent=CPT-T1_047.gene;
+AY216660.2	GbkToGff	CDS	35761	36168	.	-	0	locus_tag=CPT-T1_047;note=HHPred predicted structural similarity at 88%25 probability to S. epidermidis RipR transcriptional regulator Protein Data Bank entry 3IWF over most of protein%3B Orf no. 10 see PMID: 14972552;codon_start=1;transl_table=11;product=HTH domain-containing protein;translation=MLLLLDLFRFCEGYDKYTRQHIAKFIYAHKESERFAKAAGMTRREFTSALSKEFCARCVTEGYLDCKGGFYWCKGKIKRPVMMKLMCIDGYNNRYTWEMMHIGEMSDEDLFGERRNIDRSERRIVRKAPAYERRI;ID=CPT-T1_047.CDS.1;Parent=CPT-T1_047.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	36176	36179	.	-	.	locus_tag=CPT-T1_047;regulatory_class=ribosome_binding_site;ID=CPT-T1_047.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_047.mRNA;
+AY216660.2	GbkToGff	gene	36173	37755	.	-	.	locus_tag=CPT-T1_048;ID=CPT-T1_048.gene;
+AY216660.2	GbkToGff	mRNA	36173	37755	.	-	.	locus_tag=CPT-T1_048;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_048.mRNA;Parent=CPT-T1_048.gene;
+AY216660.2	GbkToGff	CDS	36173	37741	.	-	0	locus_tag=CPT-T1_048;note=HHPred predicted structural similarity at 95%25 probability to E. coli AAA ATPase ravA Protein Data Bank entry 3NBX over AAA domain of protein%3B Orf no. 9 see PMID: 14972552;codon_start=1;transl_table=11;product=putative ATPase;translation=MFNIKPKLNYQQIIEIANSTGVNPVAIAIRENSYGDSVSFWQDPIDINSGNDKFPLISLGGDNLVFEYAKAKAESVQFPVSSAYAHFIGCISAAMLGKFWVQYHGEEQPTALYMVISQPPSTGKSAINSAAITPMRAEIQRLNEERKKERIRLTSQLRQIEKEIKNDPKGNTTAALYEDKEKLEEKIKKMADIVFAVSDPTPEGLAKVAAVQGHFSVISDEATAINTLLGLTYGGSDKKSNSELILKAWDKNHMEVARSNQDNNLSLCPVGSICVIAQDETIKGIMDAGQRGIGVSERFLLVREEPLLGTRILCDENGDALYKEVDRGLVSKYYRLVHNIMKEDNVVLSVSRNAMRELNLARQAMEPDFAAGGKYSHSMLRGHLGKFDKHALRIASVLHTIKNWEGESPNRSNREIDLETMQEAIMIFNELSRTYLSSASAAGYAGDEAESRKLIDVITEIAKKNKGRAPIHSIVAKCRNVTPFNGQQKVAERIDSLLITLEEMNYTCRIDDIVFINPRLMG;ID=CPT-T1_048.CDS.1;Parent=CPT-T1_048.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	37751	37755	.	-	.	locus_tag=CPT-T1_048;regulatory_class=ribosome_binding_site;ID=CPT-T1_048.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_048.mRNA;
+AY216660.2	GbkToGff	gene	37741	38294	.	-	.	locus_tag=CPT-T1_049;ID=CPT-T1_049.gene;
+AY216660.2	GbkToGff	mRNA	37741	38294	.	-	.	locus_tag=CPT-T1_049;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_049.mRNA;Parent=CPT-T1_049.gene;
+AY216660.2	GbkToGff	CDS	37741	38280	.	-	0	locus_tag=CPT-T1_049;note=InterPro domain IPR003615%3B similar to phage T7 protein 3.8%3B Orf no. 8 see PMID: 14972552;codon_start=1;transl_table=11;product=HNH endonuclease;translation=MKDELKYVSGRLYWKEWRIGRRRNLLAGTVNKKGYRSICFPGGVFEYAHRIVWKIHYGNIPEGMDVDHINHERDDNRIENLRLVTRQDNLRNKGVVSSNTGVMGVYWNKKTNRYTANITINKKTKHLGTFMTLDAAAKARKEAERLYGFHENHGSNSTFCKTRVPLTVYHSRRQLRSLL;ID=CPT-T1_049.CDS.1;Parent=CPT-T1_049.mRNA;
+AY216660.2	GbkToGff	gene	38277	38706	.	-	.	locus_tag=CPT-T1_050;ID=CPT-T1_050.gene;
+AY216660.2	GbkToGff	mRNA	38277	38706	.	-	.	locus_tag=CPT-T1_050;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_050.mRNA;Parent=CPT-T1_050.gene;
+AY216660.2	GbkToGff	CDS	38277	38693	.	-	0	locus_tag=CPT-T1_050;note=HHPred predicted structural similarity at 88%25 probability to S. epidermidis RipR transcriptional regulator Protein Data Bank entry 3IWF over most of protein%3B Orf no. 7 see PMID: 14972552;codon_start=1;transl_table=11;product=hypothetical protein;translation=MKSIKLKCTSADKITGFEVNNLYKGRERYDDTREVKLKCGKYLKLEKHDELHIHGSDEIFFAKFTELKTKTLKCTGLDHRNPMKKSFKVGKRYQVESGRALGGVAGYIFDEDGCRWTLFREEVGFSIADGTTFESKYL;ID=CPT-T1_050.CDS.1;Parent=CPT-T1_050.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	38702	38706	.	-	.	locus_tag=CPT-T1_050;regulatory_class=ribosome_binding_site;ID=CPT-T1_050.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_050.mRNA;
+AY216660.2	GbkToGff	gene	38774	38994	.	-	.	locus_tag=CPT-T1_051;ID=CPT-T1_051.gene;
+AY216660.2	GbkToGff	mRNA	38774	38994	.	-	.	locus_tag=CPT-T1_051;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_051.mRNA;Parent=CPT-T1_051.gene;
+AY216660.2	GbkToGff	CDS	38774	38983	.	-	0	locus_tag=CPT-T1_051;codon_start=1;transl_table=11;product=hypothetical protein;translation=MEQDNFWTRYFAALDAGLSSEWCIKVAYKEITLDEALGDMDMDAESEYDPNFELPGDDINEDVDDYIPW;note=Orf no. 6 see PMID: 14972552;ID=CPT-T1_051.CDS.1;Parent=CPT-T1_051.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	38991	38994	.	-	.	locus_tag=CPT-T1_051;regulatory_class=ribosome_binding_site;ID=CPT-T1_051.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_051.mRNA;
+AY216660.2	GbkToGff	gene	38987	39222	.	-	.	locus_tag=CPT-T1_052;ID=CPT-T1_052.gene;
+AY216660.2	GbkToGff	mRNA	38987	39222	.	-	.	locus_tag=CPT-T1_052;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_052.mRNA;Parent=CPT-T1_052.gene;
+AY216660.2	GbkToGff	CDS	38987	39211	.	-	0	locus_tag=CPT-T1_052;codon_start=1;transl_table=11;product=hypothetical protein;translation=MSIKVENIIKHLNAKGRVFIKMDKSSGFISMTVTKTRNGNSVIGSVPGSRLINATDADVRATLEANSIYINSWG;note=Orf no. 5 see PMID: 14972552;ID=CPT-T1_052.CDS.1;Parent=CPT-T1_052.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	39219	39222	.	-	.	locus_tag=CPT-T1_052;regulatory_class=ribosome_binding_site;ID=CPT-T1_052.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_052.mRNA;
+AY216660.2	GbkToGff	gene	39289	39444	.	-	.	locus_tag=CPT-T1_053;ID=CPT-T1_053.gene;
+AY216660.2	GbkToGff	mRNA	39289	39444	.	-	.	locus_tag=CPT-T1_053;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_053.mRNA;Parent=CPT-T1_053.gene;
+AY216660.2	GbkToGff	CDS	39289	39432	.	-	0	locus_tag=CPT-T1_053;codon_start=1;transl_table=11;product=hypothetical protein;translation=MIYVHTFYTGKFNSVKNVRVYDSRQKAMMQKVVLGGTIKECKVISEC;note=Orf no. 4 see PMID: 14972552;ID=CPT-T1_053.CDS.1;Parent=CPT-T1_053.mRNA;
+AY216660.2	GbkToGff	gene	39429	39758	.	-	.	locus_tag=CPT-T1_054;ID=CPT-T1_054.gene;
+AY216660.2	GbkToGff	mRNA	39429	39758	.	-	.	locus_tag=CPT-T1_054;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_054.mRNA;Parent=CPT-T1_054.gene;
+AY216660.2	GbkToGff	CDS	39429	39749	.	-	0	locus_tag=CPT-T1_054;codon_start=1;transl_table=11;product=hypothetical protein;translation=MNHTYKITTKSPKINGSTVAALNNAAAIHEKNIMERVKAAVGRFYGINADIADSKRLFKYAPGHPYSRMVDIKHNKELVRIGSLSVDEFDHSINLVTAYQTWDGKK;note=Orf no. 3 see PMID: 14972552;ID=CPT-T1_054.CDS.1;Parent=CPT-T1_054.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	39755	39758	.	-	.	locus_tag=CPT-T1_054;regulatory_class=ribosome_binding_site;ID=CPT-T1_054.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_054.mRNA;
+AY216660.2	GbkToGff	gene	39766	39978	.	-	.	locus_tag=CPT-T1_055;ID=CPT-T1_055.gene;
+AY216660.2	GbkToGff	mRNA	39766	39978	.	-	.	locus_tag=CPT-T1_055;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_055.mRNA;Parent=CPT-T1_055.gene;
+AY216660.2	GbkToGff	CDS	39766	39966	.	-	0	locus_tag=CPT-T1_055;codon_start=1;transl_table=11;product=hypothetical protein;translation=MSDNIYRVVAISRKTQKRVIAYMGSSAIEATDAFELLKNNEGFMNTFRVRLERLEPVIIDEARKLS;note=Orf no. 2 see PMID: 14972552;ID=CPT-T1_055.CDS.1;Parent=CPT-T1_055.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	39973	39978	.	-	.	locus_tag=CPT-T1_055;regulatory_class=ribosome_binding_site;ID=CPT-T1_055.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_055.mRNA;
+AY216660.2	GbkToGff	gene	39959	40342	.	-	.	locus_tag=CPT-T1_056;ID=CPT-T1_056.gene;
+AY216660.2	GbkToGff	mRNA	39959	40342	.	-	.	locus_tag=CPT-T1_056;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_056.mRNA;Parent=CPT-T1_056.gene;
+AY216660.2	GbkToGff	CDS	39959	40330	.	-	0	locus_tag=CPT-T1_056;codon_start=1;transl_table=11;product=hypothetical protein;translation=MFKQFTDLDFSASSVIQTDEKVHVAIENIARKIHNKQEKAMIAALTAYYDVSDVMECVDRVTRVVDRLGASRLIDNDTGEVITQFNKPFMRTEPGSVAPCFVADYSITVNSFVADRVKEALYE;note=Orf no. 1 see PMID: 14972552;ID=CPT-T1_056.CDS.1;Parent=CPT-T1_056.mRNA;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	40339	40342	.	-	.	locus_tag=CPT-T1_056;regulatory_class=ribosome_binding_site;ID=CPT-T1_056.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_056.mRNA;
+AY216660.2	GbkToGff	gene	41020	41606	.	+	.	locus_tag=CPT-T1_057;ID=CPT-T1_057.gene;
+AY216660.2	GbkToGff	mRNA	41020	41606	.	+	.	locus_tag=CPT-T1_057;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_057.mRNA;Parent=CPT-T1_057.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	41020	41024	.	+	.	locus_tag=CPT-T1_057;regulatory_class=ribosome_binding_site;ID=CPT-T1_057.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_057.mRNA;
+AY216660.2	GbkToGff	CDS	41031	41606	.	+	0	locus_tag=CPT-T1_057;codon_start=1;transl_table=11;product=hypothetical protein;translation=MSSYQSDAVQAAIKAAYEKAGVTVEQRPEAKVTDVIRAACDQLYGDGENTEFTFDANKMAEAAARKSMPDADEHDVAKGAESWLLGKTDEINEKFKSSFITPIVSRHFSKIGKSVKVSVTMNDEKLRVVTISVSDEEVPVKKRRSRKKVSLADCLDSFVPDVDDLEKGDVTVSTVRDLVRQMKAHIEKCGL;note=Orf no. 77 see PMID: 14972552;ID=CPT-T1_057.CDS.1;Parent=CPT-T1_057.mRNA;
+AY216660.2	GbkToGff	gene	41606	41945	.	+	.	locus_tag=CPT-T1_058;ID=CPT-T1_058.gene;
+AY216660.2	GbkToGff	mRNA	41606	41945	.	+	.	locus_tag=CPT-T1_058;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_058.mRNA;Parent=CPT-T1_058.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	41606	41610	.	+	.	locus_tag=CPT-T1_058;regulatory_class=ribosome_binding_site;ID=CPT-T1_058.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_058.mRNA;
+AY216660.2	GbkToGff	CDS	41619	41945	.	+	0	locus_tag=CPT-T1_058;codon_start=1;transl_table=11;product=hypothetical protein;translation=MFNIKPLTEAEKQAQAKQTENIQVIADALIGKRSIKINLDTVGQSFFTKGLDKYVINVKARDLVARIQKLNNQKLKLIKVEGNMCEIENLSAPDPNKWEITDVEFIVE;note=Orf no. 76 see PMID: 14972552;ID=CPT-T1_058.CDS.1;Parent=CPT-T1_058.mRNA;
+AY216660.2	GbkToGff	gene	42013	42254	.	+	.	locus_tag=CPT-T1_059;ID=CPT-T1_059.gene;
+AY216660.2	GbkToGff	mRNA	42013	42254	.	+	.	locus_tag=CPT-T1_059;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_059.mRNA;Parent=CPT-T1_059.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	42013	42016	.	+	.	locus_tag=CPT-T1_059;regulatory_class=ribosome_binding_site;ID=CPT-T1_059.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_059.mRNA;
+AY216660.2	GbkToGff	CDS	42024	42254	.	+	0	locus_tag=CPT-T1_059;codon_start=1;transl_table=11;product=hypothetical protein;translation=MSIVKNQQAIDSTNNNRFAIFITRDNKRFAVKAVPGGYKTYMEDNGKWVRCDNLANFLVWNADLQGFDDISTLIEE;note=Orf no. 75 see PMID: 14972552;ID=CPT-T1_059.CDS.1;Parent=CPT-T1_059.mRNA;
+AY216660.2	GbkToGff	gene	42247	42487	.	+	.	locus_tag=CPT-T1_060;ID=CPT-T1_060.gene;
+AY216660.2	GbkToGff	mRNA	42247	42487	.	+	.	locus_tag=CPT-T1_060;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_060.mRNA;Parent=CPT-T1_060.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	42247	42251	.	+	.	locus_tag=CPT-T1_060;regulatory_class=ribosome_binding_site;ID=CPT-T1_060.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_060.mRNA;
+AY216660.2	GbkToGff	CDS	42260	42487	.	+	0	locus_tag=CPT-T1_060;codon_start=1;transl_table=11;product=hypothetical protein;translation=MPRYSNLTQLTRVNGHMIPAKSTHYAMGAKHGLYFKWRGQWNFTVIRNFYIRVTGDDPQSVVENSIGDNKIEVLK;note=Orf no. 74 see PMID: 14972552;ID=CPT-T1_060.CDS.1;Parent=CPT-T1_060.mRNA;
+AY216660.2	GbkToGff	gene	42473	42594	.	+	.	locus_tag=CPT-T1_061;ID=CPT-T1_061.gene;
+AY216660.2	GbkToGff	mRNA	42473	42594	.	+	.	locus_tag=CPT-T1_061;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_061.mRNA;Parent=CPT-T1_061.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	42473	42477	.	+	.	locus_tag=CPT-T1_061;regulatory_class=ribosome_binding_site;ID=CPT-T1_061.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_061.mRNA;
+AY216660.2	GbkToGff	CDS	42484	42594	.	+	0	locus_tag=CPT-T1_061;codon_start=1;transl_table=11;product=hypothetical protein;translation=MNFNIIAFWSAVWFFCVGHVVVGIVIMLLLCAGAFE;note=single transmembrane domain predicted N-in and C-out%3BOrf no. 73 see PMID: 14972552;ID=CPT-T1_061.CDS.1;Parent=CPT-T1_061.mRNA;
+AY216660.2	GbkToGff	gene	42580	42764	.	+	.	locus_tag=CPT-T1_062;ID=CPT-T1_062.gene;
+AY216660.2	GbkToGff	mRNA	42580	42764	.	+	.	locus_tag=CPT-T1_062;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_062.mRNA;Parent=CPT-T1_062.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	42580	42583	.	+	.	locus_tag=CPT-T1_062;regulatory_class=ribosome_binding_site;ID=CPT-T1_062.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_062.mRNA;
+AY216660.2	GbkToGff	CDS	42591	42764	.	+	0	locus_tag=CPT-T1_062;codon_start=1;transl_table=11;product=hypothetical protein;translation=MMRILICMMAAVAMAILVVSGCGEARDSCHETGSQVTTFVMVGNVLLPITSNEITCE;note=Orf no. 72 see PMID: 14972552;ID=CPT-T1_062.CDS.1;Parent=CPT-T1_062.mRNA;
+AY216660.2	GbkToGff	gene	42824	43324	.	+	.	locus_tag=CPT-T1_063;ID=CPT-T1_063.gene;
+AY216660.2	GbkToGff	mRNA	42824	43324	.	+	.	locus_tag=CPT-T1_063;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_063.mRNA;Parent=CPT-T1_063.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	42824	42827	.	+	.	locus_tag=CPT-T1_063;regulatory_class=ribosome_binding_site;ID=CPT-T1_063.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_063.mRNA;
+AY216660.2	GbkToGff	CDS	42836	43324	.	+	0	locus_tag=CPT-T1_063;codon_start=1;transl_table=11;product=hypothetical protein;translation=MKIKLLSNGGYKGFTRDLEADPIVVDAVKCDSSTGGYRVKVDDLVKAGVYDLDYGLSVSPVFGPADFNEKDGTMFFFDWEVKANIKPRKVRLLSNGGYPMRPGYENRTFPVIVDFIGTTDNLVYVSHEQLKAVGFVGGMNKEALCFFHRCPEPIGIECELVY;note=Orf no. 71 see PMID: 14972552;ID=CPT-T1_063.CDS.1;Parent=CPT-T1_063.mRNA;
+AY216660.2	GbkToGff	gene	43385	43881	.	+	.	locus_tag=CPT-T1_064;ID=CPT-T1_064.gene;
+AY216660.2	GbkToGff	mRNA	43385	43881	.	+	.	locus_tag=CPT-T1_064;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_064.mRNA;Parent=CPT-T1_064.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	43385	43388	.	+	.	locus_tag=CPT-T1_064;regulatory_class=ribosome_binding_site;ID=CPT-T1_064.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_064.mRNA;
+AY216660.2	GbkToGff	CDS	43396	43881	.	+	0	locus_tag=CPT-T1_064;codon_start=1;transl_table=11;product=hypothetical protein;translation=MLKLKDIQFPVVFNTISCGKITCHSKDRATDSSFNECHPSIVGNLIELHNNHNPDNIPSLPYYVEGVGPGWKVGRSIFHAAKPEIKPALQCTQIENMPLSATLKGVQLDSESWIEITATPKTIEVHDDVVILLLHYGSFKHKTVSGEISIKRGTLVRYEVK;note=Orf no. 70 see PMID: 14972552;ID=CPT-T1_064.CDS.1;Parent=CPT-T1_064.mRNA;
+AY216660.2	GbkToGff	gene	43870	44030	.	+	.	locus_tag=CPT-T1_065;ID=CPT-T1_065.gene;
+AY216660.2	GbkToGff	mRNA	43870	44030	.	+	.	locus_tag=CPT-T1_065;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_065.mRNA;Parent=CPT-T1_065.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	43870	43874	.	+	.	locus_tag=CPT-T1_065;regulatory_class=ribosome_binding_site;ID=CPT-T1_065.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_065.mRNA;
+AY216660.2	GbkToGff	CDS	43881	44030	.	+	0	locus_tag=CPT-T1_065;codon_start=1;transl_table=11;product=hypothetical protein;translation=MTAWVLIILMSKGPDHVYMESQQSCNKAREVIAENKPFGYEVKTMCVKR;note=Orf no. 69 see PMID: 14972552;ID=CPT-T1_065.CDS.1;Parent=CPT-T1_065.mRNA;
+AY216660.2	GbkToGff	gene	44098	44485	.	+	.	locus_tag=CPT-T1_066;ID=CPT-T1_066.gene;
+AY216660.2	GbkToGff	mRNA	44098	44485	.	+	.	locus_tag=CPT-T1_066;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_066.mRNA;Parent=CPT-T1_066.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	44098	44103	.	+	.	locus_tag=CPT-T1_066;regulatory_class=ribosome_binding_site;ID=CPT-T1_066.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_066.mRNA;
+AY216660.2	GbkToGff	CDS	44111	44485	.	+	0	locus_tag=CPT-T1_066;codon_start=1;transl_table=11;product=hypothetical protein;translation=MKFECISDNTKKFTVGKIYDVPTEHAEQTVALTDDTGRNRIATVTHNGEGLRWNSGGTKFATFGKKRKRTFRVNGNVAANKIHNVKPSEVDRKPALKFKEKVDLFNLAASLVLLVAAISLLSIM;note=single transmembrane domain predicted N-in and C-out%3B Orf no. 68 see PMID: 14972552;ID=CPT-T1_066.CDS.1;Parent=CPT-T1_066.mRNA;
+AY216660.2	GbkToGff	regulatory	44493	44526	.	+	.	regulatory_class=terminator%2C rho-independent;ID=AY216660.2.regulatory.16;
+AY216660.2	GbkToGff	gene	44527	44649	.	+	.	locus_tag=CPT-T1_067;ID=CPT-T1_067.gene;
+AY216660.2	GbkToGff	mRNA	44527	44649	.	+	.	locus_tag=CPT-T1_067;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_067.mRNA;Parent=CPT-T1_067.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	44527	44530	.	+	.	locus_tag=CPT-T1_067;regulatory_class=ribosome_binding_site;ID=CPT-T1_067.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_067.mRNA;
+AY216660.2	GbkToGff	CDS	44539	44649	.	+	0	locus_tag=CPT-T1_067;codon_start=1;transl_table=11;product=hypothetical protein;translation=MPDFSNWNNEPPSFQELLFCLLVLTLSLKGVLWLLS;note=single transmembrane domain predicted N-out and C-in%3B Orf no. 67 see PMID: 14972552;ID=CPT-T1_067.CDS.1;Parent=CPT-T1_067.mRNA;
+AY216660.2	GbkToGff	gene	44625	44852	.	+	.	locus_tag=CPT-T1_068;ID=CPT-T1_068.gene;
+AY216660.2	GbkToGff	mRNA	44625	44852	.	+	.	locus_tag=CPT-T1_068;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_068.mRNA;Parent=CPT-T1_068.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	44625	44628	.	+	.	locus_tag=CPT-T1_068;regulatory_class=ribosome_binding_site;ID=CPT-T1_068.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_068.mRNA;
+AY216660.2	GbkToGff	CDS	44634	44852	.	+	0	locus_tag=CPT-T1_068;codon_start=1;transl_table=11;product=hypothetical protein;translation=MATIMTVEDAARDAVEGMRPNTSRIAHYYKSEVSAVQLVHEILRLPQVDSARVVTCLKNYFCITIKTNSTNC;note=Orf no. 66 see PMID: 14972552;ID=CPT-T1_068.CDS.1;Parent=CPT-T1_068.mRNA;
+AY216660.2	GbkToGff	gene	44898	45356	.	+	.	locus_tag=CPT-T1_069;ID=CPT-T1_069.gene;
+AY216660.2	GbkToGff	mRNA	44898	45356	.	+	.	locus_tag=CPT-T1_069;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_069.mRNA;Parent=CPT-T1_069.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	44898	44901	.	+	.	locus_tag=CPT-T1_069;regulatory_class=ribosome_binding_site;ID=CPT-T1_069.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_069.mRNA;
+AY216660.2	GbkToGff	CDS	44910	45356	.	+	0	locus_tag=CPT-T1_069;codon_start=1;transl_table=11;product=hypothetical protein;translation=MKHLICIEAPNDQYTLHGLGVFKGHYITAGTYDARRGDGDLMITSKEVNPYIMQNLGNNEYMAYGCNAVYKHVKIRKRVVRAFKKIAIKYWKMSKKDAGRWARNVADSYFYRNGESCYFLIDELMENYGGDFSQGSFDDWANYEISCW;note=Orf no. 65 see PMID: 14972552;ID=CPT-T1_069.CDS.1;Parent=CPT-T1_069.mRNA;
+AY216660.2	GbkToGff	gene	45428	45970	.	+	.	locus_tag=CPT-T1_070;ID=CPT-T1_070.gene;
+AY216660.2	GbkToGff	mRNA	45428	45970	.	+	.	locus_tag=CPT-T1_070;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_070.mRNA;Parent=CPT-T1_070.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	45428	45431	.	+	.	locus_tag=CPT-T1_070;regulatory_class=ribosome_binding_site;ID=CPT-T1_070.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_070.mRNA;
+AY216660.2	GbkToGff	CDS	45440	45970	.	+	0	locus_tag=CPT-T1_070;note=HHPred 99 probability structural alignment to phage T4 polynucleotide kinase Protein Data bank entry 5UJ0 over most of protein%3B Orf no. 64 see PMID: 14972552%3B InterPro domain IPR023214;codon_start=1;transl_table=11;product=polynucleotide kinase PnkP;translation=MDKITIWGQTINLFLGTRRVAIFDFDGTLSDGSGRLHLLPTKDLHLTESWSEFNRAAIFDNPIQSTIDVMNSMFAAGYHVIILTGRSDEVRYASELWLKHHGARYDYLVMRPHTDNRKDTVMKEEAVRAIGIDNILAAWDDSVNIIKKFRDLGITTYQVCEYACDSREDLNSHGVD;ID=CPT-T1_070.CDS.1;Parent=CPT-T1_070.mRNA;
+AY216660.2	GbkToGff	gene	45941	46451	.	+	.	locus_tag=CPT-T1_071;ID=CPT-T1_071.gene;
+AY216660.2	GbkToGff	mRNA	45941	46451	.	+	.	locus_tag=CPT-T1_071;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_071.mRNA;Parent=CPT-T1_071.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	45941	45944	.	+	.	locus_tag=CPT-T1_071;regulatory_class=ribosome_binding_site;ID=CPT-T1_071.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_071.mRNA;
+AY216660.2	GbkToGff	CDS	45957	46451	.	+	0	locus_tag=CPT-T1_071;note=InterPro domains IPR001471%2C IPR003615%2C and IPR016177%3B similar to phage T7 protein 3.8%3B alternative in-frame start site proposed as T1 p63a%3B Orf no. 63 see PMID: 14972552;codon_start=1;transl_table=11;product=HNH endonuclease;translation=MVSIDNKSMVRELFTYSDGVLYWKAKSSKYSRAKIGGAAGSKDKDGYIIIRVRNETRGAHRLVWIYHNGKIPDGMEVDHMDGDITNNRIENLRLVTRTINNRNQKKRSDNTTGVSGVTFMKDRGKYRAQVRNKRLGQFDTIEEAAKAVKDERDRLGLFTKRHGV;ID=CPT-T1_071.CDS.1;Parent=CPT-T1_071.mRNA;
+AY216660.2	GbkToGff	gene	46444	47026	.	+	.	locus_tag=CPT-T1_072;ID=CPT-T1_072.gene;
+AY216660.2	GbkToGff	mRNA	46444	47026	.	+	.	locus_tag=CPT-T1_072;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_072.mRNA;Parent=CPT-T1_072.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	46444	46447	.	+	.	locus_tag=CPT-T1_072;regulatory_class=ribosome_binding_site;ID=CPT-T1_072.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_072.mRNA;
+AY216660.2	GbkToGff	CDS	46454	47026	.	+	0	locus_tag=CPT-T1_072;note=InterPro domain IPR027417%3B HHPred predicted structural similarity at 99%25 probability to phage T4 DNK Protein Data Bank Entry 1DEK%3B Orf no. 62  see PMID: 14972552;codon_start=1;transl_table=11;product=deoxynucleotide kinase;translation=MKTAIILNGAPGAGKDTIGCILADTYDHVALRSFKAPMFEIARAILGETNFEYFMFLYEDRRYKEEPASILNGKSPRQFMIWISEEVIKPQFGNRFFGMRAESKVKESHSLSVFTDGGFKDEILQMIEGDIQVKLCRIHRNGCNFDNDSRDYIYLDDMIGVNGYQECDFFSVEGHPEITAQHIAATFINK;ID=CPT-T1_072.CDS.1;Parent=CPT-T1_072.mRNA;
+AY216660.2	GbkToGff	gene	47085	47308	.	+	.	locus_tag=CPT-T1_073;ID=CPT-T1_073.gene;
+AY216660.2	GbkToGff	mRNA	47085	47308	.	+	.	locus_tag=CPT-T1_073;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_073.mRNA;Parent=CPT-T1_073.gene;
+AY216660.2	GbkToGff	CDS	47099	47308	.	+	0	locus_tag=CPT-T1_073;codon_start=1;transl_table=11;product=hypothetical protein;translation=MMVSTDKFFTCTKTSEVFELVHTDNGDFMHDGCDAFIEVKESDYDDGVYYNPAVNTQFFTPIEEEGEEA;note=Orf no. 61 see PMID: 14972552;ID=CPT-T1_073.CDS.1;Parent=CPT-T1_073.mRNA;
+AY216660.2	GbkToGff	gene	47293	47649	.	+	.	locus_tag=CPT-T1_074;ID=CPT-T1_074.gene;
+AY216660.2	GbkToGff	mRNA	47293	47649	.	+	.	locus_tag=CPT-T1_074;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_074.mRNA;Parent=CPT-T1_074.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	47293	47297	.	+	.	locus_tag=CPT-T1_074;regulatory_class=ribosome_binding_site;ID=CPT-T1_074.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_074.mRNA;
+AY216660.2	GbkToGff	CDS	47305	47649	.	+	0	locus_tag=CPT-T1_074;codon_start=1;transl_table=11;product=hypothetical protein;translation=MITINLSDKQAREILDTIGEQLHVKGDTAEILNQIERQLTPVSTNQAEFAAWKSERILPNIIKAWKRKHKKEINVEDLFTDELSPSNVAQYQLRYMESVCNQVLGVSFSFKGDK;note=Orf no. 60 see PMID: 14972552;ID=CPT-T1_074.CDS.1;Parent=CPT-T1_074.mRNA;
+AY216660.2	GbkToGff	gene	47637	47879	.	+	.	locus_tag=CPT-T1_075;ID=CPT-T1_075.gene;
+AY216660.2	GbkToGff	mRNA	47637	47879	.	+	.	locus_tag=CPT-T1_075;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_075.mRNA;Parent=CPT-T1_075.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	47637	47640	.	+	.	locus_tag=CPT-T1_075;regulatory_class=ribosome_binding_site;ID=CPT-T1_075.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_075.mRNA;
+AY216660.2	GbkToGff	CDS	47649	47879	.	+	0	locus_tag=CPT-T1_075;codon_start=1;transl_table=11;product=hypothetical protein;translation=MFGLSEAEWNVVKRAAKELNKFVSGMKKEDRKNDKIMIDVISTHHKKVELLIDRYKFVWTAGYIAGRVGNKEGDYE;note=Orf no. 59 see PMID: 14972552;ID=CPT-T1_075.CDS.1;Parent=CPT-T1_075.mRNA;
+AY216660.2	GbkToGff	gene	47864	48082	.	+	.	locus_tag=CPT-T1_076;ID=CPT-T1_076.gene;
+AY216660.2	GbkToGff	mRNA	47864	48082	.	+	.	locus_tag=CPT-T1_076;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_076.mRNA;Parent=CPT-T1_076.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	47864	47868	.	+	.	locus_tag=CPT-T1_076;regulatory_class=ribosome_binding_site;ID=CPT-T1_076.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_076.mRNA;
+AY216660.2	GbkToGff	CDS	47879	48082	.	+	0	locus_tag=CPT-T1_076;codon_start=1;transl_table=11;product=hypothetical protein;translation=MANLPKKGDQVRCVTSRNGNALSAGCLYDVEKVSKSKRLVFVYGDDGNLHEIDYPQDVTNGQFEIND;note=Orf no. 58 see PMID: 14972552;ID=CPT-T1_076.CDS.1;Parent=CPT-T1_076.mRNA;
+AY216660.2	GbkToGff	gene	48235	48408	.	+	.	locus_tag=CPT-T1_077;ID=CPT-T1_077.gene;
+AY216660.2	GbkToGff	mRNA	48235	48408	.	+	.	locus_tag=CPT-T1_077;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_077.mRNA;Parent=CPT-T1_077.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	48235	48238	.	+	.	locus_tag=CPT-T1_077;regulatory_class=ribosome_binding_site;ID=CPT-T1_077.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_077.mRNA;
+AY216660.2	GbkToGff	CDS	48247	48408	.	+	0	locus_tag=CPT-T1_077;codon_start=1;transl_table=11;product=hypothetical protein;translation=MQKTKDESVKIEIKVTRNGETTRYKKRLNPGEAVIGRIAGVMIKAQEDEAIQS;note=Orf no. 57 see PMID: 14972552;ID=CPT-T1_077.CDS.1;Parent=CPT-T1_077.mRNA;
+AY216660.2	GbkToGff	gene	48377	48574	.	+	.	locus_tag=CPT-T1_078;ID=CPT-T1_078.gene;
+AY216660.2	GbkToGff	mRNA	48377	48574	.	+	.	locus_tag=CPT-T1_078;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_078.mRNA;Parent=CPT-T1_078.gene;
+AY216660.2	GbkToGff	CDS	48389	48574	.	+	0	locus_tag=CPT-T1_078;codon_start=1;transl_table=11;product=hypothetical protein;translation=MKRFKVKLIIRKMGMFCQSCKQSFEAELSATSQDEAITKAKKLSGANLDTHKINIELIKEI;note=alternative start codon to Orf no. 56 see PMID: 14972552;ID=CPT-T1_078.CDS.1;Parent=CPT-T1_078.mRNA;
+AY216660.2	GbkToGff	gene	48564	48803	.	+	.	locus_tag=CPT-T1_079;ID=CPT-T1_079.gene;
+AY216660.2	GbkToGff	mRNA	48564	48803	.	+	.	locus_tag=CPT-T1_079;Notes=mRNA feature automatically generated by Gbk to GFF conversion;ID=CPT-T1_079.mRNA;Parent=CPT-T1_079.gene;
+AY216660.2	GbkToGff	Shine_Dalgarno_seqeunce	48564	48568	.	+	.	locus_tag=CPT-T1_079;regulatory_class=ribosome_binding_site;ID=CPT-T1_079.Shine_Dalgarno_seqeunce.1;Parent=CPT-T1_079.mRNA;
+AY216660.2	GbkToGff	CDS	48576	48803	.	+	0	locus_tag=CPT-T1_079;codon_start=1;transl_table=11;product=hypothetical protein;translation=MTIFLLIIAGVIIFGAGLFAGFALVAAAIAMDAKDKTGVWLTYSPKKDQWEMTGDLAHCYSKAKTHPKGIKRRLS;note=single transmembrane domain predicted N-out and C-in%3B Orf no. 55 see PMID: 14972552;ID=CPT-T1_079.CDS.1;Parent=CPT-T1_079.mRNA;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/test-data/PhageQC_Out.gff3	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,185 @@
+##gff-version 3
+AY216660.2	feature	gene	25	34	.	+	.	note=Missing RBS;ID=CPT-T1_001.gene;Name=
+AY216660.2	feature	gene	574	583	.	+	.	note=Missing RBS;ID=CPT-T1_002.gene;Name=
+AY216660.2	feature	gene	2215	2224	.	+	.	note=Missing RBS;ID=CPT-T1_003.gene;Name=
+AY216660.2	feature	gene	3481	3490	.	+	.	note=Missing RBS;ID=CPT-T1_004.gene;Name=
+AY216660.2	feature	gene	4249	4258	.	+	.	note=Missing RBS;ID=CPT-T1_005.gene;Name=
+AY216660.2	feature	gene	5374	5383	.	+	.	note=Missing RBS;ID=CPT-T1_006.gene;Name=
+AY216660.2	feature	gene	5911	5920	.	+	.	note=Missing RBS;ID=CPT-T1_007.gene;Name=
+AY216660.2	feature	gene	6769	6778	.	+	.	note=Missing RBS;ID=CPT-T1_008.gene;Name=
+AY216660.2	feature	gene	7779	7788	.	+	.	note=Missing RBS;ID=CPT-T1_009.gene;Name=
+AY216660.2	feature	gene	8112	8121	.	+	.	note=Missing RBS;ID=CPT-T1_010.gene;Name=
+AY216660.2	feature	gene	8519	8528	.	+	.	note=Missing RBS;ID=CPT-T1_011.gene;Name=
+AY216660.2	feature	gene	8882	8891	.	+	.	note=Missing RBS;ID=CPT-T1_012.gene;Name=
+AY216660.2	feature	gene	9317	9326	.	+	.	note=Missing RBS;ID=CPT-T1_013.gene;Name=
+AY216660.2	feature	gene	9718	9727	.	+	.	note=Missing RBS;ID=CPT-T1_014.gene;Name=
+AY216660.2	feature	gene	10501	10510	.	+	.	note=Missing RBS;ID=CPT-T1_015.gene;Name=
+AY216660.2	feature	gene	10501	10510	.	+	.	note=Missing RBS;ID=CPT-T1_016.gene;Name=
+AY216660.2	feature	gene	11177	11186	.	+	.	note=Missing RBS;ID=CPT-T1_017.gene;Name=
+AY216660.2	feature	gene	14052	14061	.	+	.	note=Missing RBS;ID=CPT-T1_018.gene;Name=
+AY216660.2	feature	gene	14485	14494	.	+	.	note=Missing RBS;ID=CPT-T1_019.gene;Name=
+AY216660.2	feature	gene	15264	15273	.	+	.	note=Missing RBS;ID=CPT-T1_020.gene;Name=
+AY216660.2	feature	gene	15994	16003	.	+	.	note=Missing RBS;ID=CPT-T1_021.gene;Name=
+AY216660.2	feature	gene	16673	16682	.	+	.	note=Missing RBS;ID=CPT-T1_022.gene;Name=
+AY216660.2	feature	gene	20236	20245	.	+	.	note=Missing RBS;ID=CPT-T1_023.gene;Name=
+AY216660.2	feature	gene	20541	20550	.	+	.	note=Missing RBS;ID=CPT-T1_024.gene;Name=
+AY216660.2	feature	gene	21524	21533	.	-	.	note=Missing RBS;ID=CPT-T1_025.gene;Name=
+AY216660.2	feature	gene	21740	21749	.	+	.	note=Missing RBS;ID=CPT-T1_026.gene;Name=
+AY216660.2	feature	gene	22005	22014	.	+	.	note=Missing RBS;ID=CPT-T1_027.gene;Name=
+AY216660.2	feature	gene	23114	23123	.	+	.	note=Missing RBS;ID=CPT-T1_028.gene;Name=
+AY216660.2	feature	gene	23839	23848	.	+	.	note=Missing RBS;ID=CPT-T1_029.gene;Name=
+AY216660.2	feature	gene	26556	26565	.	-	.	note=Missing RBS;ID=CPT-T1_030.gene;Name=
+AY216660.2	feature	gene	26606	26615	.	-	.	note=Missing RBS;ID=CPT-T1_031.gene;Name=
+AY216660.2	feature	gene	27591	27600	.	-	.	note=Missing RBS;ID=CPT-T1_032.gene;Name=
+AY216660.2	feature	gene	28104	28113	.	-	.	note=Missing RBS;ID=CPT-T1_033.gene;Name=
+AY216660.2	feature	gene	28153	28162	.	+	.	note=Missing RBS;ID=CPT-T1_034.gene;Name=
+AY216660.2	feature	gene	30166	30175	.	+	.	note=Missing RBS;ID=CPT-T1_035.gene;Name=
+AY216660.2	feature	gene	30654	30663	.	+	.	note=Missing RBS;ID=CPT-T1_036.gene;Name=
+AY216660.2	feature	gene	31362	31371	.	+	.	note=Missing RBS;ID=CPT-T1_037.gene;Name=
+AY216660.2	feature	gene	31680	31689	.	+	.	note=Missing RBS;ID=CPT-T1_038.gene;Name=
+AY216660.2	feature	gene	31866	31875	.	+	.	note=Missing RBS;ID=CPT-T1_039.gene;Name=
+AY216660.2	feature	gene	32084	32093	.	+	.	note=Missing RBS;ID=CPT-T1_040.gene;Name=
+AY216660.2	feature	gene	32452	32461	.	+	.	note=Missing RBS;ID=CPT-T1_041.gene;Name=
+AY216660.2	feature	gene	33658	33667	.	+	.	note=Missing RBS;ID=CPT-T1_042.gene;Name=
+AY216660.2	feature	gene	34211	34220	.	+	.	note=Missing RBS;ID=CPT-T1_043.gene;Name=
+AY216660.2	feature	gene	34510	34519	.	+	.	note=Missing RBS;ID=CPT-T1_044.gene;Name=
+AY216660.2	feature	gene	34724	34733	.	+	.	note=Missing RBS;ID=CPT-T1_045.gene;Name=
+AY216660.2	feature	gene	35212	35221	.	+	.	note=Missing RBS;ID=CPT-T1_046.gene;Name=
+AY216660.2	feature	gene	36185	36194	.	-	.	note=Missing RBS;ID=CPT-T1_047.gene;Name=
+AY216660.2	feature	gene	37761	37770	.	-	.	note=Missing RBS;ID=CPT-T1_048.gene;Name=
+AY216660.2	feature	gene	38300	38309	.	-	.	note=Missing RBS;ID=CPT-T1_049.gene;Name=
+AY216660.2	feature	gene	38712	38721	.	-	.	note=Missing RBS;ID=CPT-T1_050.gene;Name=
+AY216660.2	feature	gene	39000	39009	.	-	.	note=Missing RBS;ID=CPT-T1_051.gene;Name=
+AY216660.2	feature	gene	39228	39237	.	-	.	note=Missing RBS;ID=CPT-T1_052.gene;Name=
+AY216660.2	feature	gene	39450	39459	.	-	.	note=Missing RBS;ID=CPT-T1_053.gene;Name=
+AY216660.2	feature	gene	39764	39773	.	-	.	note=Missing RBS;ID=CPT-T1_054.gene;Name=
+AY216660.2	feature	gene	39984	39993	.	-	.	note=Missing RBS;ID=CPT-T1_055.gene;Name=
+AY216660.2	feature	gene	40348	40357	.	-	.	note=Missing RBS;ID=CPT-T1_056.gene;Name=
+AY216660.2	feature	gene	41005	41014	.	+	.	note=Missing RBS;ID=CPT-T1_057.gene;Name=
+AY216660.2	feature	gene	41591	41600	.	+	.	note=Missing RBS;ID=CPT-T1_058.gene;Name=
+AY216660.2	feature	gene	41998	42007	.	+	.	note=Missing RBS;ID=CPT-T1_059.gene;Name=
+AY216660.2	feature	gene	42232	42241	.	+	.	note=Missing RBS;ID=CPT-T1_060.gene;Name=
+AY216660.2	feature	gene	42458	42467	.	+	.	note=Missing RBS;ID=CPT-T1_061.gene;Name=
+AY216660.2	feature	gene	42565	42574	.	+	.	note=Missing RBS;ID=CPT-T1_062.gene;Name=
+AY216660.2	feature	gene	42809	42818	.	+	.	note=Missing RBS;ID=CPT-T1_063.gene;Name=
+AY216660.2	feature	gene	43370	43379	.	+	.	note=Missing RBS;ID=CPT-T1_064.gene;Name=
+AY216660.2	feature	gene	43855	43864	.	+	.	note=Missing RBS;ID=CPT-T1_065.gene;Name=
+AY216660.2	feature	gene	44083	44092	.	+	.	note=Missing RBS;ID=CPT-T1_066.gene;Name=
+AY216660.2	feature	gene	44512	44521	.	+	.	note=Missing RBS;ID=CPT-T1_067.gene;Name=
+AY216660.2	feature	gene	44610	44619	.	+	.	note=Missing RBS;ID=CPT-T1_068.gene;Name=
+AY216660.2	feature	gene	44883	44892	.	+	.	note=Missing RBS;ID=CPT-T1_069.gene;Name=
+AY216660.2	feature	gene	45413	45422	.	+	.	note=Missing RBS;ID=CPT-T1_070.gene;Name=
+AY216660.2	feature	gene	45926	45935	.	+	.	note=Missing RBS;ID=CPT-T1_071.gene;Name=
+AY216660.2	feature	gene	46429	46438	.	+	.	note=Missing RBS;ID=CPT-T1_072.gene;Name=
+AY216660.2	feature	gene	47070	47079	.	+	.	note=Missing RBS;ID=CPT-T1_073.gene;Name=
+AY216660.2	feature	gene	47278	47287	.	+	.	note=Missing RBS;ID=CPT-T1_074.gene;Name=
+AY216660.2	feature	gene	47622	47631	.	+	.	note=Missing RBS;ID=CPT-T1_075.gene;Name=
+AY216660.2	feature	gene	47849	47858	.	+	.	note=Missing RBS;ID=CPT-T1_076.gene;Name=
+AY216660.2	feature	gene	48220	48229	.	+	.	note=Missing RBS;ID=CPT-T1_077.gene;Name=
+AY216660.2	feature	gene	48362	48371	.	+	.	note=Missing RBS;ID=CPT-T1_078.gene;Name=
+AY216660.2	feature	gene	48549	48558	.	+	.	note=Missing RBS;ID=CPT-T1_079.gene;Name=
+AY216660.2	feature	gene	2185	2238	.	.	.	note=Excessive gap%2C 54 bases;
+AY216660.2	feature	gene	5877	5937	.	.	.	note=Excessive gap%2C 61 bases;
+AY216660.2	feature	gene	6706	6795	.	.	.	note=Excessive gap%2C 90 bases;
+AY216660.2	feature	gene	10415	10527	.	.	.	note=Excessive gap%2C 113 bases;
+AY216660.2	feature	gene	14433	14511	.	.	.	note=Excessive gap%2C 79 bases;
+AY216660.2	feature	gene	14424	14540	.	+	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	14424	14433	.	+	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	14439	14540	.	+	0	Parent=<unknown id>;
+AY216660.2	feature	gene	14447	14451	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	14452	14461	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	14447	14524	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	16622	16698	.	.	.	note=Excessive gap%2C 77 bases;
+AY216660.2	feature	gene	21507	21765	.	.	.	note=Excessive gap%2C 259 bases;
+AY216660.2	feature	gene	21907	22033	.	.	.	note=Excessive gap%2C 127 bases;
+AY216660.2	feature	gene	24290	24350	.	.	.	note=Excessive gap%2C 61 bases;
+AY216660.2	feature	gene	24286	24390	.	+	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	24286	24295	.	+	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	24301	24390	.	+	0	Parent=<unknown id>;
+AY216660.2	feature	gene	27574	27634	.	.	.	note=Excessive gap%2C 61 bases;
+AY216660.2	feature	gene	28088	28180	.	.	.	note=Excessive gap%2C 93 bases;
+AY216660.2	feature	gene	30613	30679	.	.	.	note=Excessive gap%2C 67 bases;
+AY216660.2	feature	gene	30629	30718	.	+	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	30629	30638	.	+	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	30644	30718	.	+	0	Parent=<unknown id>;
+AY216660.2	feature	gene	31642	31707	.	.	.	note=Excessive gap%2C 66 bases;
+AY216660.2	feature	gene	32400	32477	.	.	.	note=Excessive gap%2C 78 bases;
+AY216660.2	feature	gene	32385	32492	.	+	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	32385	32394	.	+	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	32400	32492	.	+	0	Parent=<unknown id>;
+AY216660.2	feature	gene	33612	33683	.	.	.	note=Excessive gap%2C 72 bases;
+AY216660.2	feature	gene	34167	34238	.	.	.	note=Excessive gap%2C 72 bases;
+AY216660.2	feature	gene	34416	34533	.	.	.	note=Excessive gap%2C 118 bases;
+AY216660.2	feature	gene	35639	35760	.	.	.	note=Excessive gap%2C 122 bases;
+AY216660.2	feature	gene	35674	35678	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	35679	35688	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	35674	35754	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	35674	35678	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	35679	35688	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	35674	35766	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	38694	38773	.	.	.	note=Excessive gap%2C 80 bases;
+AY216660.2	feature	gene	38719	38814	.	+	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	38719	38728	.	+	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	38734	38814	.	+	0	Parent=<unknown id>;
+AY216660.2	feature	gene	38725	38814	.	+	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	38725	38734	.	+	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	38740	38814	.	+	0	Parent=<unknown id>;
+AY216660.2	feature	gene	39212	39288	.	.	.	note=Excessive gap%2C 77 bases;
+AY216660.2	feature	gene	40331	41030	.	.	.	note=Excessive gap%2C 700 bases;
+AY216660.2	feature	gene	40416	40420	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	40421	40430	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	40416	40496	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	40416	40420	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	40421	40430	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	40416	40511	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	40416	40420	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	40421	40430	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	40416	40541	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	40416	40420	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	40421	40430	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	40416	40559	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	40416	40420	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	40421	40430	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	40416	40595	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	40416	40420	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	40421	40430	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	40416	40637	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	40817	40921	.	+	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	40817	40826	.	+	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	40832	40921	.	+	0	Parent=<unknown id>;
+AY216660.2	feature	gene	41946	42023	.	.	.	note=Excessive gap%2C 78 bases;
+AY216660.2	feature	gene	42765	42835	.	.	.	note=Excessive gap%2C 71 bases;
+AY216660.2	feature	gene	43325	43395	.	.	.	note=Excessive gap%2C 71 bases;
+AY216660.2	feature	gene	44031	44110	.	.	.	note=Excessive gap%2C 80 bases;
+AY216660.2	feature	gene	44486	44538	.	.	.	note=Excessive gap%2C 53 bases;
+AY216660.2	feature	gene	44853	44909	.	.	.	note=Excessive gap%2C 57 bases;
+AY216660.2	feature	gene	45357	45439	.	.	.	note=Excessive gap%2C 83 bases;
+AY216660.2	feature	gene	45368	45372	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	45373	45382	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	45368	45445	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	45368	45372	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	45373	45382	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	45368	45463	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	45368	45372	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	45373	45382	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	45368	45466	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	45368	45372	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	45373	45382	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	45368	45475	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	45368	45372	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	45373	45382	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	45368	45478	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	45368	45372	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	45373	45382	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	45368	45481	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	47027	47098	.	.	.	note=Excessive gap%2C 72 bases;
+AY216660.2	feature	gene	48083	48246	.	.	.	note=Excessive gap%2C 164 bases;
+AY216660.2	feature	gene	48086	48090	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	48091	48100	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	48086	48169	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	48160	48164	.	-	.	note=Possible gene;
+AY216660.2	feature	Shine_Dalgarno_sequence	48165	48174	.	-	.	Parent=<unknown id>;
+AY216660.2	feature	CDS	48160	48255	.	-	0	Parent=<unknown id>;
+AY216660.2	feature	gene	10528	10844	.	.	.	note=Excessive Overlap;ID=CPT-T1_015.gene;Name=
+AY216660.2	feature	gene	26447	26536	.	.	.	note=Excessive Overlap;ID=CPT-T1_030.gene;Name=
+AY216660.2	feature	gene	32112	32166	.	.	.	note=Excessive Overlap;ID=CPT-T1_039.gene;Name=
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_phageqc_annotation/test-data/PhageQC_Out.html	Fri Jun 17 13:00:50 2022 +0000
@@ -0,0 +1,1142 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta http-equiv="X-UA-Compatible" content="IE=edge">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <!-- The above 3 meta tags *must* come first in the head; any other head content must come *after* these tags -->
+    <meta name="description" content="">
+    <meta name="author" content="">
+    <title>Phage QC on AY216660.2 - 76</title>
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap.min.css">
+    <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.4/css/bootstrap-theme.min.css">
+
+    <style type="text/css">
+/*
+ * Base structure
+ */
+
+/* Move down content because we have a fixed navbar that is 50px tall */
+body {
+  padding-top: 50px;
+}
+
+h3:before {
+  display: block;
+  content: " ";
+  margin-top: -50px;
+  height: 50px;
+  visibility: hidden;
+}
+
+/*
+ * Global add-ons
+ */
+
+.sub-header {
+  padding-bottom: 10px;
+  border-bottom: 1px solid #eee;
+}
+
+/*
+ * Top navigation
+ * Hide default border to remove 1px line.
+ */
+.navbar-fixed-top {
+  border: 0;
+}
+
+/*
+ * Sidebar
+ */
+
+/* Hide for mobile, show later */
+.sidebar {
+  display: none;
+}
+@media (min-width: 768px) {
+  .sidebar {
+    position: fixed;
+    top: 51px;
+    bottom: 0;
+    left: 0;
+    z-index: 1000;
+    display: block;
+    padding: 20px;
+    overflow-x: hidden;
+    overflow-y: auto; /* Scrollable contents if viewport is shorter than content. */
+    background-color: #f5f5f5;
+    border-right: 1px solid #eee;
+  }
+}
+
+/* Sidebar navigation */
+.nav-sidebar {
+  margin-right: -21px; /* 20px padding + 1px border */
+  margin-bottom: 20px;
+  margin-left: -20px;
+}
+.nav-sidebar > li > a {
+  padding-right: 20px;
+  padding-left: 20px;
+}
+.nav-sidebar > .active > a,
+.nav-sidebar > .active > a:hover,
+.nav-sidebar > .active > a:focus {
+  color: #fff;
+  background-color: #428bca;
+}
+
+
+/*
+ * Main content
+ */
+
+.main {
+  padding: 20px;
+}
+@media (min-width: 768px) {
+  .main {
+    padding-right: 40px;
+    padding-left: 40px;
+  }
+}
+.main .page-header {
+  margin-top: 0;
+}
+
+
+/*
+ * Placeholder dashboard ideas
+ */
+
+.placeholders {
+  margin-bottom: 30px;
+  text-align: center;
+}
+.placeholders h4 {
+  margin-bottom: 0;
+}
+.placeholder {
+  margin-bottom: 20px;
+}
+.placeholder img {
+  display: inline-block;
+  border-radius: 50%;
+}
+
+td.moron {
+    font-size: 150%;
+    padding: 0px;
+    color: gray;
+}
+.strand_emph {
+    text-decoration: underline;
+    color: black;
+}
+
+    </style>
+    <!-- HTML5 shim and Respond.js for IE8 support of HTML5 elements and media queries -->
+    <!--[if lt IE 9]>
+      <script src="//oss.maxcdn.com/html5shiv/3.7.2/html5shiv.min.js"></script>
+      <script src="//oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
+    <![endif]-->
+  </head>
+  <body>
+
+    <nav class="navbar navbar-inverse navbar-fixed-top">
+      <div class="container-fluid">
+        <div class="navbar-header">
+          <a class="navbar-brand" href="#">Phage QC on AY216660.2</a>
+        </div>
+      </div>
+    </nav>
+
+    <div class="container-fluid">
+      <div class="row">
+        <div class="col-sm-3 col-md-2 sidebar">
+          <ul class="nav nav-sidebar">
+            <li><a href="#main"><b>Overview</b></a></li>
+            <li><a href="#bad_gene_starts"><b>Bad Gene Starts</b></a></li>
+            <li><a href="#missing_rbs"><small>Missing RBS</small></a></li>
+            <li><a href="#weird_starts"><small>Unusual Start Codons</small></a></li>
+            <li><a href="#excessive_gaps"><small>Excessive Gaps</small></a></li>
+            <li><a href="#excessive_overlap"><small>Excessive Overlaps</small></a></li>
+            <!--<li><a href="#coding_density"><small>Coding Density</small></a></li>-->
+            <li><a href="#antisense"><b>Antisense Genes</b></a></li>
+            <li><a href="#morons"><small>Possible Morons</small></a></li>
+            <li><a href="#annotations"><b>Annotation Issues</b></a></li>
+            <li><a href="#missing_product"><small>Missing Product Tags</small></a></li>
+          </ul>
+        </div>
+        <div class="col-sm-9 col-sm-offset-3 col-md-10 col-md-offset-2 main" id="main">
+            <div class="jumbotron">
+                <div class="row">
+                    <div class="col-sm-7">
+                        <h1>Phage AY216660.2</h1>
+                        <!--<h2>Score: 76/100</h2>-->
+                    </div>
+                    <!--<div class="col-sm-5">
+                        <table class="table table-striped">
+                            <thead>
+                                <tr>
+                                    <th>Section</th>
+                                    <th>Score</th>
+                                </tr>
+                            </thead>
+                            <tbody>
+                                <tr><td>Missing RBS</td><td>0%</td></tr>
+                                <tr><td>Excessive Gaps</td><td>76%</td></tr>
+                                <tr><td>Excessive Overlap</td><td>76%</td></tr>
+                                <tr><td>Coding Density Score</td><td>99%</td></tr>
+                                <tr><td>Possible Morons</td><td>98%</td></tr>
+                                <tr><td>Missing Product Tags</td><td>100%</td></tr>
+                            </tbody>
+                        </table>
+                    </div>-->
+                </div>
+            </div>
+
+          <h2 class="sub-header" id="bad_gene_starts">Gene Starts</h2>
+          <h3 id="missing_rbs">Genes missing RBS <small>0 / 79</small></h3>
+          <p>The following genes have issues with their RBS.</p>
+          <p>
+            Since you have not annotated any possible RBSs, this does not count off from your overall score.
+          </p>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Feature Type</th>
+                  <th>ID</th>
+                  <th>Location</th>
+                  <th>Error</th>
+                  <th>Upstream (-15 .. -5)</th>
+                </tr>
+              </thead>
+              <tbody>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_001.gene</td>
+                    <td>40..576 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">taaatgttaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_002.gene</td>
+                    <td>589..2184 [1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace">taacttt AGG </span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_003.gene</td>
+                    <td>2230..3522 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">acggcccttt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_004.gene</td>
+                    <td>3496..4273 [1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace">cc GGAG ccgg</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_005.gene</td>
+                    <td>4264..5388 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tccagacttt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_006.gene</td>
+                    <td>5389..5876 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">agcaaagtaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_007.gene</td>
+                    <td>5926..6705 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tagtcctttt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_008.gene</td>
+                    <td>6784..7755 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">ttttttatta</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_009.gene</td>
+                    <td>7794..8093 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gtcccttttt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_010.gene</td>
+                    <td>8127..8548 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">agcgcctttt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_011.gene</td>
+                    <td>8534..8919 [1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace">atgac AGGA c</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_012.gene</td>
+                    <td>8897..9355 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">atcgtccagt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_013.gene</td>
+                    <td>9332..9743 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gcaatcaagc</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_014.gene</td>
+                    <td>9733..10414 [1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace">tac GAG taga</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_015.gene</td>
+                    <td>10516..10845 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tgctatcaac</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_016.gene</td>
+                    <td>10516..11162 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tgctatcaac</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_017.gene</td>
+                    <td>11192..14076 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gttttttcgt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_018.gene</td>
+                    <td>14067..14432 [1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace">cattc AGGA a</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_019.gene</td>
+                    <td>14500..15294 [1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace">gttgc AGGT a</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_020.gene</td>
+                    <td>15279..16025 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tgtcgcttca</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_021.gene</td>
+                    <td>16009..16621 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">taatcgttcg</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_022.gene</td>
+                    <td>16688..20217 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">ataaatagca</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_023.gene</td>
+                    <td>20251..20568 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">atgccctttt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_024.gene</td>
+                    <td>20556..21257 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">cttaatagca</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_025.gene</td>
+                    <td>21279..21518 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">aatcacacta</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_026.gene</td>
+                    <td>21755..21906 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tgtatatcgt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_027.gene</td>
+                    <td>22020..23098 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">attcatcgta</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_028.gene</td>
+                    <td>23129..23820 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">aaacaaataa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_029.gene</td>
+                    <td>23854..24289 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tcgcccataa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_030.gene</td>
+                    <td>24351..26550 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">cgtgctattt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_031.gene</td>
+                    <td>26447..26600 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tgcccattgc</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_032.gene</td>
+                    <td>26638..27585 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">cgcgtttttt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_033.gene</td>
+                    <td>27635..28098 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gcaaaaagtg</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_034.gene</td>
+                    <td>28168..30199 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gttacaacga</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_035.gene</td>
+                    <td>30181..30612 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">caaagacttc</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_036.gene</td>
+                    <td>30669..31393 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">atctcaccaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_037.gene</td>
+                    <td>31377..31641 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">aaaacaaacg</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_038.gene</td>
+                    <td>31695..31917 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tcatagaaac</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_039.gene</td>
+                    <td>31881..32167 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">cttcatctct</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_040.gene</td>
+                    <td>32099..32399 [1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace"> GAG ttttacc</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_041.gene</td>
+                    <td>32467..33611 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tttcattatc</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_042.gene</td>
+                    <td>33673..34166 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">ttttatagaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_043.gene</td>
+                    <td>34226..34415 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">accacatcga</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_044.gene</td>
+                    <td>34525..34749 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gttcaaaaaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_045.gene</td>
+                    <td>34739..35237 [1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace"> AGG cgcttaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_046.gene</td>
+                    <td>35227..35638 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">acgcatattg</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_047.gene</td>
+                    <td>35761..36179 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">caatcctcga</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_048.gene</td>
+                    <td>36173..37755 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tcacgccgtc</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_049.gene</td>
+                    <td>37741..38294 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gacggcacaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_050.gene</td>
+                    <td>38277..38706 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tcaagataac</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_051.gene</td>
+                    <td>38774..38994 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tttacattaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_052.gene</td>
+                    <td>38987..39222 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">taccaaacaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_053.gene</td>
+                    <td>39289..39444 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">ggcatatcaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_054.gene</td>
+                    <td>39429..39758 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tatcctgact</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_055.gene</td>
+                    <td>39766..39978 [-1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace">tagc GGA tcg</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_056.gene</td>
+                    <td>39959..40342 [-1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">acatcaacag</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_057.gene</td>
+                    <td>41020..41606 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tgtatattga</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_058.gene</td>
+                    <td>41606..41945 [1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace">aaaatgt GGA </span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_059.gene</td>
+                    <td>42013..42254 [1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace">ac GAG atacc</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_060.gene</td>
+                    <td>42247..42487 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tcagcacttt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_061.gene</td>
+                    <td>42473..42594 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">ggcgacaaca</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_062.gene</td>
+                    <td>42580..42764 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">ttgctactgt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_063.gene</td>
+                    <td>42824..43324 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">ccatcgacaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_064.gene</td>
+                    <td>43385..43881 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">acatcaacca</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_065.gene</td>
+                    <td>43870..44030 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">actcttgtcc</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_066.gene</td>
+                    <td>44098..44485 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gcacgacaac</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_067.gene</td>
+                    <td>44527..44649 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gttccccttt</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_068.gene</td>
+                    <td>44625..44852 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gacattatct</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_069.gene</td>
+                    <td>44898..45356 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">cctacaccaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_070.gene</td>
+                    <td>45428..45970 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">ttaagcaacc</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_071.gene</td>
+                    <td>45941..46451 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gcctgtgata</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_072.gene</td>
+                    <td>46444..47026 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">tcacaaaaag</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_073.gene</td>
+                    <td>47085..47308 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">ctcatcgaca</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_074.gene</td>
+                    <td>47293..47649 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">cccgatcgaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_075.gene</td>
+                    <td>47637..47879 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">aagtttttca</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_076.gene</td>
+                    <td>47864..48082 [1]</td>
+                    <td>Unannotated but valid RBS</td>
+                    <td><span style="font-family:monospace">cgt AGGT aac</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_077.gene</td>
+                    <td>48235..48408 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">gcggcaacaa</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_078.gene</td>
+                    <td>48377..48574 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">ctggcgttat</span></td>
+                </tr>
+                <tr>
+                    <td>gene</td>
+                    <td>CPT-T1_079.gene</td>
+                    <td>48564..48803 [1]</td>
+                    <td>No RBS annotated, None found</td>
+                    <td><span style="font-family:monospace">atattgaatt</span></td>
+                </tr>
+              </tbody>
+            </table>
+          </div>
+
+          <h3 id="weird_starts">Start Codon Usage</h3>
+          <p>This section covers genes with unusual start codons</p>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Start Codon</th>
+                  <th>Count</th>
+                </tr>
+              </thead>
+              <tbody>
+                  <tr><td>ATG</td><td>74</td></tr>
+                  <tr><td>GTG</td><td>3</td></tr>
+                  <tr><td>TTG</td><td>2</td></tr>
+              </tbody>
+            </table>
+          </div>
+
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Feature Type</th>
+                  <th>ID</th>
+                  <th>Location</th>
+                  <th>Error</th>
+                </tr>
+              </thead>
+              <tbody>
+              </tbody>
+            </table>
+          </div>
+
+          <h3 id="excessive_gaps">Intergenic Gaps</h3>
+          <p>Phage genomes are under pressure to maintain high coding density. Large intergenic gaps may be a sign of incorrect gene starts or missing genes.</p>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Region</th>
+                  <th>Size</th>
+                  <th>Bounding Gene Transcription Direction</th>
+                  <th>Message</th>
+                </tr>
+              </thead>
+              <tbody>
+                <tr>
+                    <td>2184 .. 2238</td>
+                    <td>54</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>5876 .. 5937</td>
+                    <td>61</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>6705 .. 6795</td>
+                    <td>90</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>10414 .. 10527</td>
+                    <td>113</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>14432 .. 14511</td>
+                    <td>79</td>
+                    <td>→ →</td>
+                    <td>
+                        2 ORFs found in this region
+                    </td>
+                </tr>
+                <tr>
+                    <td>16621 .. 16698</td>
+                    <td>77</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>21506 .. 21765</td>
+                    <td>259</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>21906 .. 22033</td>
+                    <td>127</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>24289 .. 24350</td>
+                    <td>61</td>
+                    <td>→ →</td>
+                    <td>
+                        1 ORFs found in this region
+                    </td>
+                </tr>
+                <tr>
+                    <td>27573 .. 27634</td>
+                    <td>61</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>28087 .. 28180</td>
+                    <td>93</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>30612 .. 30679</td>
+                    <td>67</td>
+                    <td>→ →</td>
+                    <td>
+                        1 ORFs found in this region
+                    </td>
+                </tr>
+                <tr>
+                    <td>31641 .. 31707</td>
+                    <td>66</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>32399 .. 32477</td>
+                    <td>78</td>
+                    <td>→ →</td>
+                    <td>
+                        1 ORFs found in this region
+                    </td>
+                </tr>
+                <tr>
+                    <td>33611 .. 33683</td>
+                    <td>72</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>34166 .. 34238</td>
+                    <td>72</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>34415 .. 34533</td>
+                    <td>118</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>35638 .. 35760</td>
+                    <td>122</td>
+                    <td>→ →</td>
+                    <td>
+                        2 ORFs found in this region
+                    </td>
+                </tr>
+                <tr>
+                    <td>38693 .. 38773</td>
+                    <td>80</td>
+                    <td>→ →</td>
+                    <td>
+                        2 ORFs found in this region
+                    </td>
+                </tr>
+                <tr>
+                    <td>39211 .. 39288</td>
+                    <td>77</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>40330 .. 41030</td>
+                    <td>700</td>
+                    <td>→ →</td>
+                    <td>
+                        7 ORFs found in this region
+                    </td>
+                </tr>
+                <tr>
+                    <td>41945 .. 42023</td>
+                    <td>78</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>42764 .. 42835</td>
+                    <td>71</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>43324 .. 43395</td>
+                    <td>71</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>44030 .. 44110</td>
+                    <td>80</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>44485 .. 44538</td>
+                    <td>53</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>44852 .. 44909</td>
+                    <td>57</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>45356 .. 45439</td>
+                    <td>83</td>
+                    <td>→ →</td>
+                    <td>
+                        6 ORFs found in this region
+                    </td>
+                </tr>
+                <tr>
+                    <td>47026 .. 47098</td>
+                    <td>72</td>
+                    <td>→ →</td>
+                    <td>
+                    </td>
+                </tr>
+                <tr>
+                    <td>48082 .. 48246</td>
+                    <td>164</td>
+                    <td>→ →</td>
+                    <td>
+                        2 ORFs found in this region
+                    </td>
+                </tr>
+              </tbody>
+            </table>
+          </div>
+
+          <h3 id="excessive_overlap">Overlapping Genes </h3>
+          <p>Large gene overlaps may indicate an incorrect gene start or miscalled gene.</p>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Feature A</th>
+                  <th>Feature B</th>
+                  <th>Shared Region</th>
+                  <th>Overlap Length</th>
+                </tr>
+              </thead>
+              <tbody>
+                <tr>
+                    <td>CPT-T1_015.gene ([10516:10845](+))</td>
+                    <td>CPT-T1_016.gene ([10516:11162](+))</td>
+                    <td>10527..10844</td>
+                    <td>317 bp</td>
+                </tr>
+                <tr>
+                    <td>CPT-T1_030.gene ([24351:26550](-))</td>
+                    <td>CPT-T1_031.gene ([26447:26600](-))</td>
+                    <td>26446..26536</td>
+                    <td>90 bp</td>
+                </tr>
+                <tr>
+                    <td>CPT-T1_039.gene ([31881:32167](+))</td>
+                    <td>CPT-T1_040.gene ([32099:32399](+))</td>
+                    <td>32111..32166</td>
+                    <td>55 bp</td>
+                </tr>
+              </tbody>
+            </table>
+          </div>
+          <!--<h3 id="coding_density">Coding Density Issues <small>99 / 100</small></h3>
+          <div class="table-responsive">
+            <p>
+            You have a coding density of 93 which scores
+            99 / 100. Most genomes should be in the 90% to
+            100% coding density range
+            </p>
+          </div>-->
+
+          <h2 class="sub-header" id="antisense">Antisense Genes</h2>
+          <h3 id="morons">Possible Morons <small>78 / 79 (Doesn't count towards score)</small></h3>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Feature</th>
+                  <th>RBS</th>
+                  <th>Surrounding Features</th>
+                </tr>
+              </thead>
+              <tbody>
+                <tr>
+                    <td>CPT-T1_025.gene</td>
+                    <td>No RBS Available</td>
+                    <td class="moron">
+                            →
+                            →
+                        <span class="strand_emph">←</span>
+                            →
+                            →
+                        </div>
+                    </td>
+                </tr>
+              </tbody>
+            </table>
+          </div>
+
+
+          <h2 class="sub-header" id="annotations">Annotation Issues</h2>
+          <h3 id="missing_product">Missing Product Tags <small>79 / 79</small></h3>
+          <div class="table-responsive">
+            <table class="table table-striped">
+              <thead>
+                <tr>
+                  <th>Feature</th>
+                  <th>Qualifiers</th>
+                </tr>
+              </thead>
+              <tbody>
+              </tbody>
+            </table>
+          </div>
+
+
+
+
+
+        </div>
+      </div>
+    </div>
+
+
+    <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.2/jquery.min.js"></script>
+  </body>
+</html>