# HG changeset patch # User cpt # Date 1685932977 0 # Node ID 1bdd481d5c25fddfe6fb104c135f04c995541c71 # Parent 66143811fe8aad6d5ae9b6e7f9d8757fde277aa5 planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c diff -r 66143811fe8a -r 1bdd481d5c25 BIO_FIX_TOPO.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BIO_FIX_TOPO.py Mon Jun 05 02:42:57 2023 +0000 @@ -0,0 +1,84 @@ +import Bio.GenBank + + +def record_end(self, content): + """Clean up when we've finished the record.""" + # from Bio import Alphabet + # from Bio.Alphabet import IUPAC + from Bio.Seq import Seq, UnknownSeq + + # Try and append the version number to the accession for the full id + if not self.data.id: + assert "accessions" not in self.data.annotations, self.data.annotations[ + "accessions" + ] + self.data.id = self.data.name # Good fall back? + elif self.data.id.count(".") == 0: + try: + self.data.id += ".%i" % self.data.annotations["sequence_version"] + except KeyError: + pass + + # add the sequence information + # first, determine the alphabet + # we default to an generic alphabet if we don't have a + # seq type or have strange sequence information. + + # seq_alphabet = Alphabet.generic_alphabet + + # now set the sequence + sequence = "".join(self._seq_data) + + if ( + self._expected_size is not None + and len(sequence) != 0 + and self._expected_size != len(sequence) + ): + import warnings + from Bio import BiopythonParserWarning + + warnings.warn( + "Expected sequence length %i, found %i (%s)." + % (self._expected_size, len(sequence), self.data.id), + BiopythonParserWarning, + ) + """ + if self._seq_type: + # mRNA is really also DNA, since it is actually cDNA + if "DNA" in self._seq_type.upper() or "MRNA" in self._seq_type.upper(): + seq_alphabet = IUPAC.ambiguous_dna + # are there ever really RNA sequences in GenBank? + elif "RNA" in self._seq_type.upper(): + # Even for data which was from RNA, the sequence string + # is usually given as DNA (T not U). Bug 2408 + if "T" in sequence and "U" not in sequence: + seq_alphabet = IUPAC.ambiguous_dna + else: + seq_alphabet = IUPAC.ambiguous_rna + elif ( + "PROTEIN" in self._seq_type.upper() or self._seq_type == "PRT" + ): # PRT is used in EMBL-bank for patents + seq_alphabet = IUPAC.protein # or extended protein? + # work around ugly GenBank records which have circular or + # linear but no indication of sequence type + elif self._seq_type in ["circular", "linear", "unspecified"]: + pass + # we have a bug if we get here + else: + raise ValueError( + "Could not determine alphabet for seq_type %s" % self._seq_type + ) + + # Also save the chomosome layout + if "circular" in self._seq_type.lower(): + self.data.annotations["topology"] = "circular" + elif "linear" in self._seq_type.lower(): + self.data.annotations["topology"] = "linear" + """ + if not sequence and self.__expected_size: + self.data.seq = UnknownSeq(self._expected_size) # , seq_alphabet) + else: + self.data.seq = Seq(sequence) # , seq_alphabet) + + +Bio.GenBank._FeatureConsumer.record_end = record_end diff -r 66143811fe8a -r 1bdd481d5c25 cpt-macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt-macros.xml Mon Jun 05 02:42:57 2023 +0000 @@ -0,0 +1,115 @@ + + + + python + biopython + requests + cpt_gffparser + + + + + + + + 10.1371/journal.pcbi.1008214 + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + 10.1371/journal.pcbi.1008214 + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + + + + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + + + + diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/BIO_FIX_TOPO.py --- a/cpt_gbk_to_5col/BIO_FIX_TOPO.py Fri Jun 17 12:45:08 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ -import Bio.GenBank - - -def record_end(self, content): - """Clean up when we've finished the record. - """ - #from Bio import Alphabet - #from Bio.Alphabet import IUPAC - from Bio.Seq import Seq, UnknownSeq - - # Try and append the version number to the accession for the full id - if not self.data.id: - assert "accessions" not in self.data.annotations, self.data.annotations[ - "accessions" - ] - self.data.id = self.data.name # Good fall back? - elif self.data.id.count(".") == 0: - try: - self.data.id += ".%i" % self.data.annotations["sequence_version"] - except KeyError: - pass - - # add the sequence information - # first, determine the alphabet - # we default to an generic alphabet if we don't have a - # seq type or have strange sequence information. - - #seq_alphabet = Alphabet.generic_alphabet - - # now set the sequence - sequence = "".join(self._seq_data) - - if ( - self._expected_size is not None - and len(sequence) != 0 - and self._expected_size != len(sequence) - ): - import warnings - from Bio import BiopythonParserWarning - - warnings.warn( - "Expected sequence length %i, found %i (%s)." - % (self._expected_size, len(sequence), self.data.id), - BiopythonParserWarning, - ) - """ - if self._seq_type: - # mRNA is really also DNA, since it is actually cDNA - if "DNA" in self._seq_type.upper() or "MRNA" in self._seq_type.upper(): - seq_alphabet = IUPAC.ambiguous_dna - # are there ever really RNA sequences in GenBank? - elif "RNA" in self._seq_type.upper(): - # Even for data which was from RNA, the sequence string - # is usually given as DNA (T not U). Bug 2408 - if "T" in sequence and "U" not in sequence: - seq_alphabet = IUPAC.ambiguous_dna - else: - seq_alphabet = IUPAC.ambiguous_rna - elif ( - "PROTEIN" in self._seq_type.upper() or self._seq_type == "PRT" - ): # PRT is used in EMBL-bank for patents - seq_alphabet = IUPAC.protein # or extended protein? - # work around ugly GenBank records which have circular or - # linear but no indication of sequence type - elif self._seq_type in ["circular", "linear", "unspecified"]: - pass - # we have a bug if we get here - else: - raise ValueError( - "Could not determine alphabet for seq_type %s" % self._seq_type - ) - - # Also save the chomosome layout - if "circular" in self._seq_type.lower(): - self.data.annotations["topology"] = "circular" - elif "linear" in self._seq_type.lower(): - self.data.annotations["topology"] = "linear" - """ - if not sequence and self.__expected_size: - self.data.seq = UnknownSeq(self._expected_size)#, seq_alphabet) - else: - self.data.seq = Seq(sequence)#, seq_alphabet) - - -Bio.GenBank._FeatureConsumer.record_end = record_end diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/cpt-macros.xml --- a/cpt_gbk_to_5col/cpt-macros.xml Fri Jun 17 12:45:08 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,115 +0,0 @@ - - - - - python - biopython - requests - - - - - - - - 10.1371/journal.pcbi.1008214 - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {C. Ross}, - title = {CPT Galaxy Tools}, - year = {2020-}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {E. Mijalis, H. Rasche}, - title = {CPT Galaxy Tools}, - year = {2013-2017}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {A. Criscione}, - title = {CPT Galaxy Tools}, - year = {2019-2021}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - 10.1371/journal.pcbi.1008214 - - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - - - - @unpublished{galaxyTools, - author = {C. Maughmer}, - title = {CPT Galaxy Tools}, - year = {2017-2020}, - note = {https://github.com/tamu-cpt/galaxy-tools/} - } - - - - diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/gbk_to_five_col.py --- a/cpt_gbk_to_5col/gbk_to_five_col.py Fri Jun 17 12:45:08 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -#!/usr/bin/env python -import BIO_FIX_TOPO # NOQA -import argparse -import logging -from Bio import SeqIO - -logging.basicConfig(level=logging.INFO) -log = logging.getLogger() - - -# Read in Genbank file and parse features -# Output features into Five Column format - -""" ->Feature SeqID -Line 1 - Column 1: Start location (first nucleotide) of a feature - Column 2: Stop location (last nucleotide) of a feature - Column 3: Feature name (for example, 'CDS' or 'mRNA' or 'rRNA' or 'gene' or 'exon') -Line2: - Column 4: Qualifier name (for example, 'product' or 'number' or 'gene' or 'note') - Column 5: Qualifier value - -Repeat for each feature in a seq -Repeat Line 2 for each qualifier in a feature -""" - - -def gbk_to_5col(genbank): - """Converts genbank to BankIt five column format""" - for record in SeqIO.parse(genbank, "genbank"): - print(">Feature %s" % record.id) - for feature in record.features: - if feature.type == "source": - continue - else: - for index, part in enumerate(feature.location.parts): - if part.strand > 0: - start = int(part.start) + 1 - end = int(part.end) - else: - start = int(part.end) - end = int(part.start) + 1 - if index == 0: - name = feature.type - print("%d\t%d\t%s" % (start, end, name)) - else: - print("%d\t%d" % (start, end)) - for (qualifier, values) in feature.qualifiers.items(): - for value in values: - print("\t\t\t%s\t%s" % (qualifier, value)) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Convert a Genbank file into five column format" - ) - parser.add_argument("genbank", type=argparse.FileType("r"), help="Genbank file") - - args = vars(parser.parse_args()) - gbk_to_5col(**args) diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/gbk_to_five_col.xml --- a/cpt_gbk_to_5col/gbk_to_five_col.xml Fri Jun 17 12:45:08 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ - - - - - macros.xml - cpt-macros.xml - - - "$output" - -]]> - - - - - - - - - - - - - - -Genbank Format to Five Column Format -==================================== - -Output format is: - ->Feature ID -Line 1 -- Column 1: Start location (first nucleotide) of a feature -- Column 2: Stop location (last nucleotide) of a feature -- Column 3: Feature name (for example, 'CDS' or 'mRNA' or 'rRNA' or 'gene' or 'exon') - -Line2: -- Column 4: Qualifier name (for example, 'product' or 'number' or 'gene' or 'note') -- Column 5: Qualifier value - -Example Output:: - - >Feature contig00077 - 0 22956 source - mol_type genomic DNA - organism AU1189 - 11652 11326 CDS - 11327 11158 - note tapemeasure frameshift chaperone - product P2 E' tapemeasure frameshift chaperone - gene gp14 - translation MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGVSLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGLPDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ - 11900 11599 CDS - 11600 11408 - 11910 11904 RBS - - - - diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/macros.xml --- a/cpt_gbk_to_5col/macros.xml Fri Jun 17 12:45:08 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ - - - - - python - biopython - cpt_gffparser - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/test-data/complex_feature_locs.gbk --- a/cpt_gbk_to_5col/test-data/complex_feature_locs.gbk Fri Jun 17 12:45:08 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -LOCUS contig00077 300 bp DNA linear 15-MAR-2010 -DEFINITION '[length=22956]' '[numreads=4517 from AU1189;454 Data]'. -ACCESSION -VERSION -KEYWORDS . -SOURCE AU1189 - ORGANISM AU1189 - Unclassified. -REFERENCE 1 (bases 1 to 22956) - AUTHORS Duarte,I. - TITLE contig77 - JOURNAL Unpublished -REFERENCE 2 (bases 1 to 22956) - AUTHORS Duarte,I. - TITLE Direct Submission - JOURNAL Submitted (15-MAR-2010) PLPM, Texas A&M University, 2132 TAMU, - College Station, TX 77840, USA -FEATURES Location/Qualifiers - source 1..22956 - /organism="AU1189" - /mol_type="genomic DNA" - CDS complement(join(11159..11327,11327..11652)) - /note="tapemeasure frameshift chaperone" - /product="P2 E' tapemeasure frameshift chaperone" - /translation="MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGV - SLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGL - PDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ" - /gene="gp14" - CDS complement(join(11409..11600,11600..11900)) - RBS complement(11905..11910) -BASE COUNT 3240 a 7606 c 8254 g 3856 t -ORIGIN - 1 agccgggcgc gccaagcctg atcaggctct cagcggtttc ctcccatcgt cgtgcagtac - 61 cgttgcagct aaattgcagc cggaatcggc gcgggctcgg ccgtcagcgg cgcgacccat - 121 tgcgccagat gcgcggccga cagatgcgcg taccgctgca ccatttccat cgtctcccag - 181 ccgcccagct ccttcagcac ctgcagcggc gtgccgcgtt ggacgtgcca gctcgcccag - 241 gtgtggcgca ggtcgtgcca gcggaaatcg tgcaggccgg cgcgccgcag cgccttggcc -// diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/test-data/gbkto5col.tsv --- a/cpt_gbk_to_5col/test-data/gbkto5col.tsv Fri Jun 17 12:45:08 2022 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,10 +0,0 @@ ->Feature contig00077 -11652 11327 CDS -11327 11159 - note tapemeasure frameshift chaperone - product P2 E' tapemeasure frameshift chaperone - translation MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGVSLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGLPDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ - gene gp14 -11900 11600 CDS -11600 11409 -11910 11905 RBS diff -r 66143811fe8a -r 1bdd481d5c25 gbk_to_five_col.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gbk_to_five_col.py Mon Jun 05 02:42:57 2023 +0000 @@ -0,0 +1,61 @@ +#!/usr/bin/env python +import BIO_FIX_TOPO # NOQA +import argparse +import logging +from Bio import SeqIO + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger() + + +# Read in Genbank file and parse features +# Output features into Five Column format + +""" +>Feature SeqID +Line 1 + Column 1: Start location (first nucleotide) of a feature + Column 2: Stop location (last nucleotide) of a feature + Column 3: Feature name (for example, 'CDS' or 'mRNA' or 'rRNA' or 'gene' or 'exon') +Line2: + Column 4: Qualifier name (for example, 'product' or 'number' or 'gene' or 'note') + Column 5: Qualifier value + +Repeat for each feature in a seq +Repeat Line 2 for each qualifier in a feature +""" + + +def gbk_to_5col(genbank): + """Converts genbank to BankIt five column format""" + for record in SeqIO.parse(genbank, "genbank"): + print(">Feature %s" % record.id) + for feature in record.features: + if feature.type == "source": + continue + else: + for index, part in enumerate(feature.location.parts): + if part.strand > 0: + start = int(part.start) + 1 + end = int(part.end) + else: + start = int(part.end) + end = int(part.start) + 1 + if index == 0: + name = feature.type + print("%d\t%d\t%s" % (start, end, name)) + else: + print("%d\t%d" % (start, end)) + for (qualifier, values) in feature.qualifiers.items(): + for value in values: + print("\t\t\t%s\t%s" % (qualifier, value)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Convert a Genbank file into five column format" + ) + parser.add_argument("genbank", type=argparse.FileType("r"), help="Genbank file") + + args = vars(parser.parse_args()) + gbk_to_5col(**args) diff -r 66143811fe8a -r 1bdd481d5c25 gbk_to_five_col.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gbk_to_five_col.xml Mon Jun 05 02:42:57 2023 +0000 @@ -0,0 +1,62 @@ + + + + macros.xml + cpt-macros.xml + + + "$output" + +]]> + + + + + + + + + + + + + + +Genbank Format to Five Column Format +==================================== + +Output format is: + +>Feature ID +Line 1 +- Column 1: Start location (first nucleotide) of a feature +- Column 2: Stop location (last nucleotide) of a feature +- Column 3: Feature name (for example, 'CDS' or 'mRNA' or 'rRNA' or 'gene' or 'exon') + +Line2: +- Column 4: Qualifier name (for example, 'product' or 'number' or 'gene' or 'note') +- Column 5: Qualifier value + +Example Output:: + + >Feature contig00077 + 0 22956 source + mol_type genomic DNA + organism AU1189 + 11652 11326 CDS + 11327 11158 + note tapemeasure frameshift chaperone + product P2 E' tapemeasure frameshift chaperone + gene gp14 + translation MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGVSLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGLPDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ + 11900 11599 CDS + 11600 11408 + 11910 11904 RBS + + + + diff -r 66143811fe8a -r 1bdd481d5c25 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Mon Jun 05 02:42:57 2023 +0000 @@ -0,0 +1,74 @@ + + + + progressivemauve + + bcbiogff + + + + 2.4.0 + + 10.1371/journal.pone.0011147 + + + 10.1093/bioinformatics/btm039 + + + '$xmfa' + + + + + + '$sequences' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + '$gff3_data' + + + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + + + #if $reference_genome.reference_genome_source == 'history': + ln -s '$reference_genome.genome_fasta' genomeref.fa; + #end if + + + #if str($reference_genome.reference_genome_source) == 'cached': + '${reference_genome.fasta_indexes.fields.path}' + #else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa + #end if + + diff -r 66143811fe8a -r 1bdd481d5c25 test-data/complex_feature_locs.gbk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/complex_feature_locs.gbk Mon Jun 05 02:42:57 2023 +0000 @@ -0,0 +1,38 @@ +LOCUS contig00077 300 bp DNA linear 15-MAR-2010 +DEFINITION '[length=22956]' '[numreads=4517 from AU1189;454 Data]'. +ACCESSION +VERSION +KEYWORDS . +SOURCE AU1189 + ORGANISM AU1189 + Unclassified. +REFERENCE 1 (bases 1 to 22956) + AUTHORS Duarte,I. + TITLE contig77 + JOURNAL Unpublished +REFERENCE 2 (bases 1 to 22956) + AUTHORS Duarte,I. + TITLE Direct Submission + JOURNAL Submitted (15-MAR-2010) PLPM, Texas A&M University, 2132 TAMU, + College Station, TX 77840, USA +FEATURES Location/Qualifiers + source 1..22956 + /organism="AU1189" + /mol_type="genomic DNA" + CDS complement(join(11159..11327,11327..11652)) + /note="tapemeasure frameshift chaperone" + /product="P2 E' tapemeasure frameshift chaperone" + /translation="MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGV + SLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGL + PDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ" + /gene="gp14" + CDS complement(join(11409..11600,11600..11900)) + RBS complement(11905..11910) +BASE COUNT 3240 a 7606 c 8254 g 3856 t +ORIGIN + 1 agccgggcgc gccaagcctg atcaggctct cagcggtttc ctcccatcgt cgtgcagtac + 61 cgttgcagct aaattgcagc cggaatcggc gcgggctcgg ccgtcagcgg cgcgacccat + 121 tgcgccagat gcgcggccga cagatgcgcg taccgctgca ccatttccat cgtctcccag + 181 ccgcccagct ccttcagcac ctgcagcggc gtgccgcgtt ggacgtgcca gctcgcccag + 241 gtgtggcgca ggtcgtgcca gcggaaatcg tgcaggccgg cgcgccgcag cgccttggcc +// diff -r 66143811fe8a -r 1bdd481d5c25 test-data/gbkto5col.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gbkto5col.tsv Mon Jun 05 02:42:57 2023 +0000 @@ -0,0 +1,10 @@ +>Feature contig00077 +11652 11327 CDS +11327 11159 + note tapemeasure frameshift chaperone + product P2 E' tapemeasure frameshift chaperone + translation MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGVSLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGLPDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ + gene gp14 +11900 11600 CDS +11600 11409 +11910 11905 RBS