# HG changeset patch
# User cpt
# Date 1685932977 0
# Node ID 1bdd481d5c25fddfe6fb104c135f04c995541c71
# Parent 66143811fe8aad6d5ae9b6e7f9d8757fde277aa5
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
diff -r 66143811fe8a -r 1bdd481d5c25 BIO_FIX_TOPO.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/BIO_FIX_TOPO.py Mon Jun 05 02:42:57 2023 +0000
@@ -0,0 +1,84 @@
+import Bio.GenBank
+
+
+def record_end(self, content):
+ """Clean up when we've finished the record."""
+ # from Bio import Alphabet
+ # from Bio.Alphabet import IUPAC
+ from Bio.Seq import Seq, UnknownSeq
+
+ # Try and append the version number to the accession for the full id
+ if not self.data.id:
+ assert "accessions" not in self.data.annotations, self.data.annotations[
+ "accessions"
+ ]
+ self.data.id = self.data.name # Good fall back?
+ elif self.data.id.count(".") == 0:
+ try:
+ self.data.id += ".%i" % self.data.annotations["sequence_version"]
+ except KeyError:
+ pass
+
+ # add the sequence information
+ # first, determine the alphabet
+ # we default to an generic alphabet if we don't have a
+ # seq type or have strange sequence information.
+
+ # seq_alphabet = Alphabet.generic_alphabet
+
+ # now set the sequence
+ sequence = "".join(self._seq_data)
+
+ if (
+ self._expected_size is not None
+ and len(sequence) != 0
+ and self._expected_size != len(sequence)
+ ):
+ import warnings
+ from Bio import BiopythonParserWarning
+
+ warnings.warn(
+ "Expected sequence length %i, found %i (%s)."
+ % (self._expected_size, len(sequence), self.data.id),
+ BiopythonParserWarning,
+ )
+ """
+ if self._seq_type:
+ # mRNA is really also DNA, since it is actually cDNA
+ if "DNA" in self._seq_type.upper() or "MRNA" in self._seq_type.upper():
+ seq_alphabet = IUPAC.ambiguous_dna
+ # are there ever really RNA sequences in GenBank?
+ elif "RNA" in self._seq_type.upper():
+ # Even for data which was from RNA, the sequence string
+ # is usually given as DNA (T not U). Bug 2408
+ if "T" in sequence and "U" not in sequence:
+ seq_alphabet = IUPAC.ambiguous_dna
+ else:
+ seq_alphabet = IUPAC.ambiguous_rna
+ elif (
+ "PROTEIN" in self._seq_type.upper() or self._seq_type == "PRT"
+ ): # PRT is used in EMBL-bank for patents
+ seq_alphabet = IUPAC.protein # or extended protein?
+ # work around ugly GenBank records which have circular or
+ # linear but no indication of sequence type
+ elif self._seq_type in ["circular", "linear", "unspecified"]:
+ pass
+ # we have a bug if we get here
+ else:
+ raise ValueError(
+ "Could not determine alphabet for seq_type %s" % self._seq_type
+ )
+
+ # Also save the chomosome layout
+ if "circular" in self._seq_type.lower():
+ self.data.annotations["topology"] = "circular"
+ elif "linear" in self._seq_type.lower():
+ self.data.annotations["topology"] = "linear"
+ """
+ if not sequence and self.__expected_size:
+ self.data.seq = UnknownSeq(self._expected_size) # , seq_alphabet)
+ else:
+ self.data.seq = Seq(sequence) # , seq_alphabet)
+
+
+Bio.GenBank._FeatureConsumer.record_end = record_end
diff -r 66143811fe8a -r 1bdd481d5c25 cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml Mon Jun 05 02:42:57 2023 +0000
@@ -0,0 +1,115 @@
+
+
+
+ python
+ biopython
+ requests
+ cpt_gffparser
+
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/BIO_FIX_TOPO.py
--- a/cpt_gbk_to_5col/BIO_FIX_TOPO.py Fri Jun 17 12:45:08 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,85 +0,0 @@
-import Bio.GenBank
-
-
-def record_end(self, content):
- """Clean up when we've finished the record.
- """
- #from Bio import Alphabet
- #from Bio.Alphabet import IUPAC
- from Bio.Seq import Seq, UnknownSeq
-
- # Try and append the version number to the accession for the full id
- if not self.data.id:
- assert "accessions" not in self.data.annotations, self.data.annotations[
- "accessions"
- ]
- self.data.id = self.data.name # Good fall back?
- elif self.data.id.count(".") == 0:
- try:
- self.data.id += ".%i" % self.data.annotations["sequence_version"]
- except KeyError:
- pass
-
- # add the sequence information
- # first, determine the alphabet
- # we default to an generic alphabet if we don't have a
- # seq type or have strange sequence information.
-
- #seq_alphabet = Alphabet.generic_alphabet
-
- # now set the sequence
- sequence = "".join(self._seq_data)
-
- if (
- self._expected_size is not None
- and len(sequence) != 0
- and self._expected_size != len(sequence)
- ):
- import warnings
- from Bio import BiopythonParserWarning
-
- warnings.warn(
- "Expected sequence length %i, found %i (%s)."
- % (self._expected_size, len(sequence), self.data.id),
- BiopythonParserWarning,
- )
- """
- if self._seq_type:
- # mRNA is really also DNA, since it is actually cDNA
- if "DNA" in self._seq_type.upper() or "MRNA" in self._seq_type.upper():
- seq_alphabet = IUPAC.ambiguous_dna
- # are there ever really RNA sequences in GenBank?
- elif "RNA" in self._seq_type.upper():
- # Even for data which was from RNA, the sequence string
- # is usually given as DNA (T not U). Bug 2408
- if "T" in sequence and "U" not in sequence:
- seq_alphabet = IUPAC.ambiguous_dna
- else:
- seq_alphabet = IUPAC.ambiguous_rna
- elif (
- "PROTEIN" in self._seq_type.upper() or self._seq_type == "PRT"
- ): # PRT is used in EMBL-bank for patents
- seq_alphabet = IUPAC.protein # or extended protein?
- # work around ugly GenBank records which have circular or
- # linear but no indication of sequence type
- elif self._seq_type in ["circular", "linear", "unspecified"]:
- pass
- # we have a bug if we get here
- else:
- raise ValueError(
- "Could not determine alphabet for seq_type %s" % self._seq_type
- )
-
- # Also save the chomosome layout
- if "circular" in self._seq_type.lower():
- self.data.annotations["topology"] = "circular"
- elif "linear" in self._seq_type.lower():
- self.data.annotations["topology"] = "linear"
- """
- if not sequence and self.__expected_size:
- self.data.seq = UnknownSeq(self._expected_size)#, seq_alphabet)
- else:
- self.data.seq = Seq(sequence)#, seq_alphabet)
-
-
-Bio.GenBank._FeatureConsumer.record_end = record_end
diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/cpt-macros.xml
--- a/cpt_gbk_to_5col/cpt-macros.xml Fri Jun 17 12:45:08 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-
-
-
-
- python
- biopython
- requests
-
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {C. Ross},
- title = {CPT Galaxy Tools},
- year = {2020-},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- 10.1371/journal.pcbi.1008214
-
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
-
-
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-
-
-
-
diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/gbk_to_five_col.py
--- a/cpt_gbk_to_5col/gbk_to_five_col.py Fri Jun 17 12:45:08 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,61 +0,0 @@
-#!/usr/bin/env python
-import BIO_FIX_TOPO # NOQA
-import argparse
-import logging
-from Bio import SeqIO
-
-logging.basicConfig(level=logging.INFO)
-log = logging.getLogger()
-
-
-# Read in Genbank file and parse features
-# Output features into Five Column format
-
-"""
->Feature SeqID
-Line 1
- Column 1: Start location (first nucleotide) of a feature
- Column 2: Stop location (last nucleotide) of a feature
- Column 3: Feature name (for example, 'CDS' or 'mRNA' or 'rRNA' or 'gene' or 'exon')
-Line2:
- Column 4: Qualifier name (for example, 'product' or 'number' or 'gene' or 'note')
- Column 5: Qualifier value
-
-Repeat for each feature in a seq
-Repeat Line 2 for each qualifier in a feature
-"""
-
-
-def gbk_to_5col(genbank):
- """Converts genbank to BankIt five column format"""
- for record in SeqIO.parse(genbank, "genbank"):
- print(">Feature %s" % record.id)
- for feature in record.features:
- if feature.type == "source":
- continue
- else:
- for index, part in enumerate(feature.location.parts):
- if part.strand > 0:
- start = int(part.start) + 1
- end = int(part.end)
- else:
- start = int(part.end)
- end = int(part.start) + 1
- if index == 0:
- name = feature.type
- print("%d\t%d\t%s" % (start, end, name))
- else:
- print("%d\t%d" % (start, end))
- for (qualifier, values) in feature.qualifiers.items():
- for value in values:
- print("\t\t\t%s\t%s" % (qualifier, value))
-
-
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(
- description="Convert a Genbank file into five column format"
- )
- parser.add_argument("genbank", type=argparse.FileType("r"), help="Genbank file")
-
- args = vars(parser.parse_args())
- gbk_to_5col(**args)
diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/gbk_to_five_col.xml
--- a/cpt_gbk_to_5col/gbk_to_five_col.xml Fri Jun 17 12:45:08 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-
-
-
-
- macros.xml
- cpt-macros.xml
-
-
- "$output"
-
-]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-Genbank Format to Five Column Format
-====================================
-
-Output format is:
-
->Feature ID
-Line 1
-- Column 1: Start location (first nucleotide) of a feature
-- Column 2: Stop location (last nucleotide) of a feature
-- Column 3: Feature name (for example, 'CDS' or 'mRNA' or 'rRNA' or 'gene' or 'exon')
-
-Line2:
-- Column 4: Qualifier name (for example, 'product' or 'number' or 'gene' or 'note')
-- Column 5: Qualifier value
-
-Example Output::
-
- >Feature contig00077
- 0 22956 source
- mol_type genomic DNA
- organism AU1189
- 11652 11326 CDS
- 11327 11158
- note tapemeasure frameshift chaperone
- product P2 E' tapemeasure frameshift chaperone
- gene gp14
- translation MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGVSLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGLPDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ
- 11900 11599 CDS
- 11600 11408
- 11910 11904 RBS
-
-
-
-
diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/macros.xml
--- a/cpt_gbk_to_5col/macros.xml Fri Jun 17 12:45:08 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-
-
-
-
- python
- biopython
- cpt_gffparser
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/test-data/complex_feature_locs.gbk
--- a/cpt_gbk_to_5col/test-data/complex_feature_locs.gbk Fri Jun 17 12:45:08 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,38 +0,0 @@
-LOCUS contig00077 300 bp DNA linear 15-MAR-2010
-DEFINITION '[length=22956]' '[numreads=4517 from AU1189;454 Data]'.
-ACCESSION
-VERSION
-KEYWORDS .
-SOURCE AU1189
- ORGANISM AU1189
- Unclassified.
-REFERENCE 1 (bases 1 to 22956)
- AUTHORS Duarte,I.
- TITLE contig77
- JOURNAL Unpublished
-REFERENCE 2 (bases 1 to 22956)
- AUTHORS Duarte,I.
- TITLE Direct Submission
- JOURNAL Submitted (15-MAR-2010) PLPM, Texas A&M University, 2132 TAMU,
- College Station, TX 77840, USA
-FEATURES Location/Qualifiers
- source 1..22956
- /organism="AU1189"
- /mol_type="genomic DNA"
- CDS complement(join(11159..11327,11327..11652))
- /note="tapemeasure frameshift chaperone"
- /product="P2 E' tapemeasure frameshift chaperone"
- /translation="MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGV
- SLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGL
- PDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ"
- /gene="gp14"
- CDS complement(join(11409..11600,11600..11900))
- RBS complement(11905..11910)
-BASE COUNT 3240 a 7606 c 8254 g 3856 t
-ORIGIN
- 1 agccgggcgc gccaagcctg atcaggctct cagcggtttc ctcccatcgt cgtgcagtac
- 61 cgttgcagct aaattgcagc cggaatcggc gcgggctcgg ccgtcagcgg cgcgacccat
- 121 tgcgccagat gcgcggccga cagatgcgcg taccgctgca ccatttccat cgtctcccag
- 181 ccgcccagct ccttcagcac ctgcagcggc gtgccgcgtt ggacgtgcca gctcgcccag
- 241 gtgtggcgca ggtcgtgcca gcggaaatcg tgcaggccgg cgcgccgcag cgccttggcc
-//
diff -r 66143811fe8a -r 1bdd481d5c25 cpt_gbk_to_5col/test-data/gbkto5col.tsv
--- a/cpt_gbk_to_5col/test-data/gbkto5col.tsv Fri Jun 17 12:45:08 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
->Feature contig00077
-11652 11327 CDS
-11327 11159
- note tapemeasure frameshift chaperone
- product P2 E' tapemeasure frameshift chaperone
- translation MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGVSLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGLPDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ
- gene gp14
-11900 11600 CDS
-11600 11409
-11910 11905 RBS
diff -r 66143811fe8a -r 1bdd481d5c25 gbk_to_five_col.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gbk_to_five_col.py Mon Jun 05 02:42:57 2023 +0000
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+import BIO_FIX_TOPO # NOQA
+import argparse
+import logging
+from Bio import SeqIO
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger()
+
+
+# Read in Genbank file and parse features
+# Output features into Five Column format
+
+"""
+>Feature SeqID
+Line 1
+ Column 1: Start location (first nucleotide) of a feature
+ Column 2: Stop location (last nucleotide) of a feature
+ Column 3: Feature name (for example, 'CDS' or 'mRNA' or 'rRNA' or 'gene' or 'exon')
+Line2:
+ Column 4: Qualifier name (for example, 'product' or 'number' or 'gene' or 'note')
+ Column 5: Qualifier value
+
+Repeat for each feature in a seq
+Repeat Line 2 for each qualifier in a feature
+"""
+
+
+def gbk_to_5col(genbank):
+ """Converts genbank to BankIt five column format"""
+ for record in SeqIO.parse(genbank, "genbank"):
+ print(">Feature %s" % record.id)
+ for feature in record.features:
+ if feature.type == "source":
+ continue
+ else:
+ for index, part in enumerate(feature.location.parts):
+ if part.strand > 0:
+ start = int(part.start) + 1
+ end = int(part.end)
+ else:
+ start = int(part.end)
+ end = int(part.start) + 1
+ if index == 0:
+ name = feature.type
+ print("%d\t%d\t%s" % (start, end, name))
+ else:
+ print("%d\t%d" % (start, end))
+ for (qualifier, values) in feature.qualifiers.items():
+ for value in values:
+ print("\t\t\t%s\t%s" % (qualifier, value))
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Convert a Genbank file into five column format"
+ )
+ parser.add_argument("genbank", type=argparse.FileType("r"), help="Genbank file")
+
+ args = vars(parser.parse_args())
+ gbk_to_5col(**args)
diff -r 66143811fe8a -r 1bdd481d5c25 gbk_to_five_col.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gbk_to_five_col.xml Mon Jun 05 02:42:57 2023 +0000
@@ -0,0 +1,62 @@
+
+
+
+ macros.xml
+ cpt-macros.xml
+
+
+ "$output"
+
+]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Genbank Format to Five Column Format
+====================================
+
+Output format is:
+
+>Feature ID
+Line 1
+- Column 1: Start location (first nucleotide) of a feature
+- Column 2: Stop location (last nucleotide) of a feature
+- Column 3: Feature name (for example, 'CDS' or 'mRNA' or 'rRNA' or 'gene' or 'exon')
+
+Line2:
+- Column 4: Qualifier name (for example, 'product' or 'number' or 'gene' or 'note')
+- Column 5: Qualifier value
+
+Example Output::
+
+ >Feature contig00077
+ 0 22956 source
+ mol_type genomic DNA
+ organism AU1189
+ 11652 11326 CDS
+ 11327 11158
+ note tapemeasure frameshift chaperone
+ product P2 E' tapemeasure frameshift chaperone
+ gene gp14
+ translation MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGVSLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGLPDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ
+ 11900 11599 CDS
+ 11600 11408
+ 11910 11904 RBS
+
+
+
+
diff -r 66143811fe8a -r 1bdd481d5c25 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jun 05 02:42:57 2023 +0000
@@ -0,0 +1,74 @@
+
+
+
+ progressivemauve
+
+ bcbiogff
+
+
+
+ 2.4.0
+
+ 10.1371/journal.pone.0011147
+
+
+ 10.1093/bioinformatics/btm039
+
+
+ '$xmfa'
+
+
+
+
+
+ '$sequences'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ '$gff3_data'
+
+
+ #if str($reference_genome.reference_genome_source) == 'cached':
+ '${reference_genome.fasta_indexes.fields.path}'
+ #else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+ #end if
+
+
+ #if $reference_genome.reference_genome_source == 'history':
+ ln -s '$reference_genome.genome_fasta' genomeref.fa;
+ #end if
+
+
+ #if str($reference_genome.reference_genome_source) == 'cached':
+ '${reference_genome.fasta_indexes.fields.path}'
+ #else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+ #end if
+
+
diff -r 66143811fe8a -r 1bdd481d5c25 test-data/complex_feature_locs.gbk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/complex_feature_locs.gbk Mon Jun 05 02:42:57 2023 +0000
@@ -0,0 +1,38 @@
+LOCUS contig00077 300 bp DNA linear 15-MAR-2010
+DEFINITION '[length=22956]' '[numreads=4517 from AU1189;454 Data]'.
+ACCESSION
+VERSION
+KEYWORDS .
+SOURCE AU1189
+ ORGANISM AU1189
+ Unclassified.
+REFERENCE 1 (bases 1 to 22956)
+ AUTHORS Duarte,I.
+ TITLE contig77
+ JOURNAL Unpublished
+REFERENCE 2 (bases 1 to 22956)
+ AUTHORS Duarte,I.
+ TITLE Direct Submission
+ JOURNAL Submitted (15-MAR-2010) PLPM, Texas A&M University, 2132 TAMU,
+ College Station, TX 77840, USA
+FEATURES Location/Qualifiers
+ source 1..22956
+ /organism="AU1189"
+ /mol_type="genomic DNA"
+ CDS complement(join(11159..11327,11327..11652))
+ /note="tapemeasure frameshift chaperone"
+ /product="P2 E' tapemeasure frameshift chaperone"
+ /translation="MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGV
+ SLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGL
+ PDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ"
+ /gene="gp14"
+ CDS complement(join(11409..11600,11600..11900))
+ RBS complement(11905..11910)
+BASE COUNT 3240 a 7606 c 8254 g 3856 t
+ORIGIN
+ 1 agccgggcgc gccaagcctg atcaggctct cagcggtttc ctcccatcgt cgtgcagtac
+ 61 cgttgcagct aaattgcagc cggaatcggc gcgggctcgg ccgtcagcgg cgcgacccat
+ 121 tgcgccagat gcgcggccga cagatgcgcg taccgctgca ccatttccat cgtctcccag
+ 181 ccgcccagct ccttcagcac ctgcagcggc gtgccgcgtt ggacgtgcca gctcgcccag
+ 241 gtgtggcgca ggtcgtgcca gcggaaatcg tgcaggccgg cgcgccgcag cgccttggcc
+//
diff -r 66143811fe8a -r 1bdd481d5c25 test-data/gbkto5col.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gbkto5col.tsv Mon Jun 05 02:42:57 2023 +0000
@@ -0,0 +1,10 @@
+>Feature contig00077
+11652 11327 CDS
+11327 11159
+ note tapemeasure frameshift chaperone
+ product P2 E' tapemeasure frameshift chaperone
+ translation MNPIQSDAAAPDLQADAAAIATPAQDDPATHTLDTPLVRGTQTITSITLRKPKSGELRGVSLSDLVSLDVVALSKVLPRISSPMLTEADVASIDPADLVQLGGIFAGFFDAEGREIPTGLPDRVEDPMADIATVFGWTPPVMDAFSLAELMDWRERARVRAGAQ
+ gene gp14
+11900 11600 CDS
+11600 11409
+11910 11905 RBS