Mercurial > repos > menegidio > mitoprokka
changeset 0:8813044f72bf draft
Uploaded
author | menegidio |
---|---|
date | Wed, 26 May 2021 16:01:54 +0000 |
parents | |
children | 2c228021040a |
files | mito-prokka/.shed.yml mito-prokka/mito-prokka.xml mito-prokka/test-data/out.err mito-prokka/test-data/out.faa mito-prokka/test-data/out.ffn mito-prokka/test-data/out.fna mito-prokka/test-data/out.fsa mito-prokka/test-data/out.gbk mito-prokka/test-data/out.gff mito-prokka/test-data/out.sqn mito-prokka/test-data/out.tbl mito-prokka/test-data/out.tsv mito-prokka/test-data/out.txt mito-prokka/test-data/phiX174.fasta |
diffstat | 14 files changed, 1998 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/.shed.yml Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,13 @@ +name: mito-prokka +owner: menegidio +remote_repository_url: "github.com/menegidio-lab/mitogalaxy/tree/master/tools-shed/mito-prokka" +homepage_url: "https://github.com/tseemann/prokka" +type: unrestricted +description: Mito-Prokka - Rapid annotation of bacteria, archaeal, viral and mitochondria genomes +long_description: | + Prokka is a software tool to rapidly annotate bacterial, archaeal and viral + genomes, and produce output files that require only minor tweaking to submit + to GenBank/ENA/DDBJ. Mito-Prokka is a version adapted for annotation of + mitochondria and chloroplasts. +categories: +- "Sequence Analysis"
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/mito-prokka.xml Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,249 @@ +<tool id="prokka" name="Prokka" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> + <description>Prokaryotic genome annotation</description> + <macros> + <token name="@TOOL_VERSION@">1.14.6</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">prokka</requirement> + </requirements> + + <version_command>prokka --version</version_command> + <command detect_errors="exit_code"><![CDATA[ +prokka +--cpus \${GALAXY_SLOTS:-8} +--quiet ## to avoid non-error messages written to stderr +--outdir outdir --prefix prokka ## used in outputs section +#if $locustag + --locustag '$locustag' +#end if +#if str($increment) + --increment $increment +#end if +--gffver $gffver +#if $compliant.compliant_select == "no" + #if $compliant.addgenes + --addgenes + #end if + #if str($compliant.mincontig) + --mincontig $compliant.mincontig + #end if +#else + --compliant +#end if +#if $centre + --centre '$centre' +#end if +#if $genus + --genus '$genus' +#end if +#if $species + --species '$species' +#end if +#if $strain + --strain '$strain' +#end if +#if $plasmid + --plasmid '$plasmid' +#end if +--kingdom $kingdom.kingdom_select +#if str($kingdom.gcode) + --gcode $kingdom.gcode +#end if +#if $usegenus + --usegenus +#end if +#if $proteins + --proteins '$proteins' +#end if +#if $metagenome + --metagenome +#end if +#if $fast + --fast +#end if +#if str($evalue) + --evalue $evalue +#end if +#if $rfam + --rfam +#end if +#if $norrna + --norrna +#end if +#if $notrna + --notrna +#end if +$input + ]]></command> + <inputs> + <param name="input" type="data" format="fasta" label="Contigs to annotate" help="FASTA format" /> + <param argument="--locustag" type="text" value="" label="Locus tag prefix" /> + <param argument="--increment" type="integer" value="1" min="1" optional="true" label="Locus tag counter increment" /> + <param argument="--gffver" type="select" label="GFF version"> + <option value="3">3</option> + <option value="2">2</option> + <option value="1">1</option> + </param> + <conditional name="compliant"> + <param name="compliant_select" type="select" label="Force GenBank/ENA/DDJB compliance" + help="Equivalent to --addgenes --mincontiglen 200 --centre Prokka (or other centre specified below) (--compliant)"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"> + <param name="addgenes" type="boolean" checked="false" label="Add 'gene' features for each 'CDS' feature (--addgenes)" /> + <param name="mincontig" type="integer" value="200" optional="true" label="Minimum contig size (--mincontiglen)" help="NCBI needs 200" /> + </when> + <when value="yes" /> + </conditional> + <param argument="--centre" type="text" value="" label="Sequencing centre ID" /> + <param argument="--genus" type="text" value="" label="Genus name" help="May be used to aid annotation, see --usegenus below" /> + <param argument="--species" type="text" value="" label="Species name" /> + <param argument="--strain" type="text" value="" label="Strain name" /> + <param argument="--plasmid" type="text" value="" label="Plasmid name or identifier" /> + <conditional name="kingdom"> + <param name="kingdom_select" type="select" label="Kingdom" help="(--kingdom)"> + <option value="Archaea">Archaea</option> + <option value="Bacteria">Bacteria</option> + <option value="Mitochondria" selected="true">Mitochondria</option> + <option value="Viruses">Viruses</option> + </param> + <when value="Archaea"> + <param argument="--gcode" type="integer" value="11" min="1" max="23" optional="true" label="Genetic code (transl_table)" /><!-- max should be 25, but prodigal would crash --> + </when> + <when value="Bacteria"> + <param argument="--gcode" type="integer" value="11" min="1" max="23" optional="true" label="Genetic code (transl_table)" /><!-- max should be 25, but prodigal would crash --> + <!-- <param name="gram" type="select" display="radio" label="Gram (- -gram)"> + <option selected="true" value="none">N/A</option> + <option value="pos">positive</option> + <option value="neg">negative</option> + </param> SignalP is not FOSS --> + </when> + <when value="Mitochondria"> + <param argument="--gcode" type="integer" value="1" min="1" max="23" optional="true" label="Genetic code (transl_table)" /><!-- max should be 25, but prodigal would crash --> + </when> + <when value="Viruses"> + <param argument="--gcode" type="integer" value="1" min="1" max="23" optional="true" label="Genetic code (transl_table)" /><!-- max should be 25, but prodigal would crash --> + </when> + </conditional> + <param argument="--usegenus" type="boolean" checked="false" label="Use genus-specific BLAST database" help="Will use the BLAST database for the genus specified above, if installed" /> + <param argument="--proteins" type="data" format="fasta" optional="true" label="Optional FASTA file of trusted proteins to first annotate from" /> + <param argument="--metagenome" type="boolean" checked="false" label="Improve gene predictions for highly fragmented genomes" help="Will set --meta option for Prodigal" /> + <param argument="--fast" type="boolean" checked="false" label="Fast mode" help="Skip CDS /product searching" /> + <param argument="--evalue" type="float" value="1e-06" min="0" optional="true" label="Similarity e-value cut-off" /> + <param argument="--rfam" type="boolean" checked="false" label="Enable searching for ncRNAs with Infernal+Rfam (SLOW!)" /> + <param argument="--norrna" type="boolean" checked="false" label="Don't run rRNA search with Barrnap" /> + <param argument="--notrna" type="boolean" checked="false" label="Don't run tRNA search with Aragorn" /> + + <param name="outputs" type="select" multiple="true" display="checkboxes" label="Additional outputs"> + <option value="gff" selected="True">Annotation in GFF3 format, containing both sequences and annotations (.gff)</option> + <option value="gbk" selected="True">Standard GenBank file. If the input was a multi-FASTA, then this will be a multi-GenBank, with one record for each sequence (.gbk)</option> + <option value="fna" selected="True">Nucleotide FASTA file of the input contig sequences (.fna)</option> + <option value="faa" selected="True">Protein FASTA file of the translated CDS sequences (.faa)</option> + <option value="ffn" selected="True">Nucleotide FASTA file of all the annotated sequences, not just CDS (.ffn)</option> + <option value="sqn" selected="True">An ASN1 format "Sequin" file for submission to GenBank. It needs to be edited to set the correct taxonomy, authors, related publication, etc. (.sqn)</option> + <option value="fsa" selected="True">Nucleotide FASTA file of the input contig sequences, with extra Sequin tags in the sequence description lines (.fsa)</option> + <option value="tbl" selected="True">Feature Table file (.tbl)</option> + <option value="tsv" selected="True">Annotations in tabular format including COGs etc.</option> + <option value="err" selected="True">Unacceptable annotations - the NCBI discrepancy report (.err)</option> + <option value="txt" selected="True">Statistics relating to the annotated features found (.txt)</option> + </param> + + </inputs> + <outputs> + <data name="out_gff" format="gff" label="${tool.name} on ${on_string}: gff" from_work_dir="outdir/prokka.gff"> + <filter>outputs and 'gff' in outputs</filter> + </data> + <data name="out_gbk" format="genbank" label="${tool.name} on ${on_string}: gbk" from_work_dir="outdir/prokka.gbk"> + <filter>outputs and 'gbk' in outputs</filter> + </data> + <data name="out_fna" format="fasta" label="${tool.name} on ${on_string}: fna" from_work_dir="outdir/prokka.fna"> + <filter>outputs and 'fna' in outputs</filter> + </data> + <data name="out_faa" format="fasta" label="${tool.name} on ${on_string}: faa" from_work_dir="outdir/prokka.faa"> + <filter>outputs and 'faa' in outputs</filter> + </data> + <data name="out_ffn" format="fasta" label="${tool.name} on ${on_string}: ffn" from_work_dir="outdir/prokka.ffn"> + <filter>outputs and 'ffn' in outputs</filter> + </data> + <data name="out_sqn" format="asn1" label="${tool.name} on ${on_string}: sqn" from_work_dir="outdir/prokka.sqn"> + <filter>outputs and 'sqn' in outputs</filter> + </data> + <data name="out_fsa" format="fasta" label="${tool.name} on ${on_string}: fsa" from_work_dir="outdir/prokka.fsa"> + <filter>outputs and 'fsa' in outputs</filter> + </data> + <data name="out_tbl" format="txt" label="${tool.name} on ${on_string}: tbl" from_work_dir="outdir/prokka.tbl"> + <filter>outputs and 'tbl' in outputs</filter> + </data> + <data name="out_tsv" format="tabular" label="${tool.name} on ${on_string}: tsv" from_work_dir="outdir/prokka.tsv"> + <filter>outputs and 'tsv' in outputs</filter> + </data> + <data name="out_err" format="txt" label="${tool.name} on ${on_string}: err" from_work_dir="outdir/prokka.err"> + <filter>outputs and 'err' in outputs</filter> + </data> + <data name="out_txt" format="txt" label="${tool.name} on ${on_string}: txt" from_work_dir="outdir/prokka.txt"> + <filter>outputs and 'txt' in outputs</filter> + </data> + <data name="out_log" format="txt" label="${tool.name} on ${on_string}: log" from_work_dir="outdir/prokka.log" /> + </outputs> + <tests> + <test> + <param name="input" ftype="fasta" value="phiX174.fasta" /> + <param name="outputs" value="gff,gbk,fna,faa,ffn,sqn,fsa,tbl,tsv,err,txt" /> + <output name="out_gff" file="out.gff" /> + <output name="out_gbk" > + <assert_contents> + <has_text_matching expression="LOCUS" /> + <has_text_matching expression="//" /> + </assert_contents> + </output> + <output name="out_fna" file="out.fna" /> + <output name="out_faa" file="out.faa" /> + <output name="out_ffn" file="out.ffn" /> + <output name="out_sqn" > + <assert_contents> + <has_text_matching expression="Seq-entry" /> + <has_text_matching expression="contig2" /> + </assert_contents> + </output> + <output name="out_fsa" file="out.fsa" /> + <output name="out_tbl" file="out.tbl" /> + <output name="out_tsv" file="out.tsv" /> + <output name="out_err" file="out.err" lines_diff="14" /> + <output name="out_txt" file="out.txt" /> + <output name="out_log"> + <assert_contents> + <has_text text="Type 'prokka --citation' for more details." /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**What it does** + +Prokka_ is a software tool to rapidly annotate bacterial, archaeal and viral genomes, and produce output files that require only minor tweaking to submit to GenBank/ENA/DDBJ. + +.. _Prokka: http://github.com/tseemann/prokka + +**Output files** + +Prokka creates several output files, which are described in the **Additional outputs** section above. + +**License and citation** + +This Galaxy tool is Copyright © 2013 Lionel Guy, © 2013-2014 `CRS4 Srl.`_, © 2015-2016 `Earlham Institute`_, 2018 `Galaxy IUC` and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _Earlham Institute: http://earlham.ac.uk/ +.. _MIT license: https://opensource.org/licenses/MIT + +You can use this tool only if you agree to the license terms of: `Prokka`_. + +.. _Prokka: http://github.com/tseemann/prokka + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btu135</citation> + <citation type="doi">10.1093/bioinformatics/btu153</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/out.err Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,144 @@ +Discrepancy Report Results + +Summary +DISC_PROTEIN_NAMES:All proteins have same name "hypothetical protein" +FATAL: MISSING_PROTEIN_ID:5 proteins have invalid IDs. +DISC_SOURCE_QUALS_ASNDISC:strain (all present, all same) +DISC_SOURCE_QUALS_ASNDISC:taxname (all present, all same) +DISC_FEATURE_COUNT:CDS: 5 present +DISC_COUNT_NUCLEOTIDES:7 nucleotide Bioseqs are present +FEATURE_LOCATION_CONFLICT:5 features have inconsistent gene locations. +NO_ANNOTATION:3 bioseqs have no features +DISC_QUALITY_SCORES:Quality scores are missing on all sequences. +ONCALLER_COMMENT_PRESENT:7 comment descriptors were found (all same) +SHORT_PROT_SEQUENCES:2 protein sequences are shorter than 50 aa. +MISSING_GENOMEASSEMBLY_COMMENTS:7 bioseqs are missing GenomeAssembly structured comments +MOLTYPE_NOT_MRNA:7 molecule types are not set as mRNA. +TECHNIQUE_NOT_TSA:7 technique are not set as TSA +MISSING_STRUCTURED_COMMENT:7 sequences do not include structured comments. +MISSING_PROJECT:12 sequences do not include project. +DISC_INCONSISTENT_MOLINFO_TECH:Molinfo Technique Report (some missing, all same) + + +Detailed Report + +DiscRep_ALL:DISC_PROTEIN_NAMES::All proteins have same name "hypothetical protein" + +FATAL: DiscRep_ALL:MISSING_PROTEIN_ID::5 proteins have invalid IDs. +outdir/prokka:contig2_1 (length 152) +outdir/prokka:contig2_2 (length 38) +outdir/prokka:contig4_1 (length 287) +outdir/prokka:contig5_1 (length 43) +outdir/prokka:contig6_1 (length 253) + +DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::strain (all present, all same) +DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::7 sources have 'strain' for strain +DiscRep_ALL:DISC_SOURCE_QUALS_ASNDISC::taxname (all present, all same) +DiscRep_SUB:DISC_SOURCE_QUALS_ASNDISC::7 sources have 'Genus species' for taxname +DiscRep_ALL:DISC_FEATURE_COUNT::CDS: 5 present +DiscRep_ALL:DISC_COUNT_NUCLEOTIDES::7 nucleotide Bioseqs are present +outdir/prokka:contig1 (length 350) +outdir/prokka:contig2 (length 840) +outdir/prokka:contig3 (length 210) +outdir/prokka:contig4 (length 1260) +outdir/prokka:contig5 (length 490) +outdir/prokka:contig6 (length 1960) +outdir/prokka:contig7 (length 276) + +DiscRep_ALL:FEATURE_LOCATION_CONFLICT::5 features have inconsistent gene locations. +DiscRep_SUB:FEATURE_LOCATION_CONFLICT::Coding region xref gene does not exist +outdir/prokka:CDS hypothetical protein contig2:40-498 HMJLFLJH_00001 + +DiscRep_SUB:FEATURE_LOCATION_CONFLICT::Coding region xref gene does not exist +outdir/prokka:CDS hypothetical protein contig2:498-614 HMJLFLJH_00002 + +DiscRep_SUB:FEATURE_LOCATION_CONFLICT::Coding region xref gene does not exist +outdir/prokka:CDS hypothetical protein contig4:21-884 HMJLFLJH_00003 + +DiscRep_SUB:FEATURE_LOCATION_CONFLICT::Coding region xref gene does not exist +outdir/prokka:CDS hypothetical protein contig5:275-406 HMJLFLJH_00004 + +DiscRep_SUB:FEATURE_LOCATION_CONFLICT::Coding region xref gene does not exist +outdir/prokka:CDS hypothetical protein contig6:6-767 HMJLFLJH_00005 + +DiscRep_ALL:NO_ANNOTATION::3 bioseqs have no features +outdir/prokka:contig1 (length 350) +outdir/prokka:contig3 (length 210) +outdir/prokka:contig7 (length 276) + +DiscRep_ALL:DISC_QUALITY_SCORES::Quality scores are missing on all sequences. + +DiscRep_ALL:ONCALLER_COMMENT_PRESENT::7 comment descriptors were found (all same) +outdir/prokka:contig1:Annotated using prokka 1.14.5 from https://github.com/tseemann/prokka +outdir/prokka:contig2:Annotated using prokka 1.14.5 from https://github.com/tseemann/prokka +outdir/prokka:contig3:Annotated using prokka 1.14.5 from https://github.com/tseemann/prokka +outdir/prokka:contig4:Annotated using prokka 1.14.5 from https://github.com/tseemann/prokka +outdir/prokka:contig5:Annotated using prokka 1.14.5 from https://github.com/tseemann/prokka +outdir/prokka:contig6:Annotated using prokka 1.14.5 from https://github.com/tseemann/prokka +outdir/prokka:contig7:Annotated using prokka 1.14.5 from https://github.com/tseemann/prokka + +DiscRep_ALL:SHORT_PROT_SEQUENCES::2 protein sequences are shorter than 50 aa. +outdir/prokka:contig2_2 (length 38) +outdir/prokka:contig5_1 (length 43) + +DiscRep_ALL:MISSING_GENOMEASSEMBLY_COMMENTS::7 bioseqs are missing GenomeAssembly structured comments +outdir/prokka:contig1 (length 350) +outdir/prokka:contig2 (length 840) +outdir/prokka:contig3 (length 210) +outdir/prokka:contig4 (length 1260) +outdir/prokka:contig5 (length 490) +outdir/prokka:contig6 (length 1960) +outdir/prokka:contig7 (length 276) + +DiscRep_ALL:MOLTYPE_NOT_MRNA::7 molecule types are not set as mRNA. +outdir/prokka:contig1 (length 350) +outdir/prokka:contig2 (length 840) +outdir/prokka:contig3 (length 210) +outdir/prokka:contig4 (length 1260) +outdir/prokka:contig5 (length 490) +outdir/prokka:contig6 (length 1960) +outdir/prokka:contig7 (length 276) + +DiscRep_ALL:TECHNIQUE_NOT_TSA::7 technique are not set as TSA +outdir/prokka:contig1 (length 350) +outdir/prokka:contig2 (length 840) +outdir/prokka:contig3 (length 210) +outdir/prokka:contig4 (length 1260) +outdir/prokka:contig5 (length 490) +outdir/prokka:contig6 (length 1960) +outdir/prokka:contig7 (length 276) + +DiscRep_ALL:MISSING_STRUCTURED_COMMENT::7 sequences do not include structured comments. +outdir/prokka:contig1 (length 350) +outdir/prokka:contig2 (length 840) +outdir/prokka:contig3 (length 210) +outdir/prokka:contig4 (length 1260) +outdir/prokka:contig5 (length 490) +outdir/prokka:contig6 (length 1960) +outdir/prokka:contig7 (length 276) + +DiscRep_ALL:MISSING_PROJECT::12 sequences do not include project. +outdir/prokka:contig1 (length 350) +outdir/prokka:contig2 (length 840) +outdir/prokka:contig2_1 (length 152) +outdir/prokka:contig2_2 (length 38) +outdir/prokka:contig3 (length 210) +outdir/prokka:contig4 (length 1260) +outdir/prokka:contig4_1 (length 287) +outdir/prokka:contig5 (length 490) +outdir/prokka:contig5_1 (length 43) +outdir/prokka:contig6 (length 1960) +outdir/prokka:contig6_1 (length 253) +outdir/prokka:contig7 (length 276) + +DiscRep_ALL:DISC_INCONSISTENT_MOLINFO_TECH::Molinfo Technique Report (some missing, all same) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::technique (all missing) +DiscRep_SUB:DISC_INCONSISTENT_MOLINFO_TECH::7 Molinfos are missing field technique +outdir/prokka:contig1 (length 350) +outdir/prokka:contig2 (length 840) +outdir/prokka:contig3 (length 210) +outdir/prokka:contig4 (length 1260) +outdir/prokka:contig5 (length 490) +outdir/prokka:contig6 (length 1960) +outdir/prokka:contig7 (length 276) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/out.faa Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,20 @@ +>HMJLFLJH_00001 hypothetical protein +MSQVTEQSVRFQTALASIKLIQASAVLDLTEDDFDFLTSNKVWIATDRSRARRCVEACVY +GTLDFVGYPRFPAPVEFIAAVIAYYVHPVNIQTACLIMEGAEFTENIINGVERPVKAAEL +FAFTLRVRAGNTDVLTDAEENVRQKLRAEGVM +>HMJLFLJH_00002 hypothetical protein +MSKGKKRSGARPGRPQPLRGTKGKRKGARLWYVGGQQF +>HMJLFLJH_00003 hypothetical protein +MPDRTEANPNELNQDDARYGFRCCHLKNIWTAPLPPETELSRQMTTSTTSIDIMGLQAAY +ANLHTDQERDYFMQRYHDVISSFGGKTSYDADNRPLLVMRSNLWASGYDVDGTDQTSLGQ +FSGRVQQTYKHSVPRFFVPEHGTMFTLALVRFPPTATKEIQYLNAKGALTYTDIAGDPVL +YGNLPPREISMKDVFRSGDSSKKFKIAEGQWYRYAPSYVSPAYHLLEGFPFIQEPPSGDL +QERVLIRHHDYDQCFQSVQLLQWNSQVKFNVTVYRNLPTTRDSIMTS +>HMJLFLJH_00004 hypothetical protein +MVLLLAVLLLLLLVAPCLNCLEAVKKPPPVAFKVMCLLPITIL +>HMJLFLJH_00005 hypothetical protein +MAKAGKGLLEGTLQAGTSAVSDKLLDLVGLGGKSAADKGKDTRDYLAAAFPELNAWERAG +ADASSAGMVDAGFENQKELTKMQLDNQKEIAEMQNETQKEIAGIQSATSRQNTKDQVYAQ +NEMLAYQQKESTARVASIMENTNLSKQQQVSEIMRQMLTQAQTAGQYFTNDQIKEMTRKV +SAEVDLVHQQTQNQRYGSSHIGATAKDISNVVTDAASGVVDIFHGIDKAVADTWNNFWKD +GKADGIGSNLSRK
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/out.ffn Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,46 @@ +>HMJLFLJH_00001 hypothetical protein +ATGAGTCAAGTTACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTC +ATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAAC +AAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTAT +GGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCC +GTCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGC +GCTGAATTTACGGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTG +TTCGCGTTTACCTTGCGTGTACGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAA +AACGTGCGTCAAAAATTACGTGCGGAAGGAGTGATGTAA +>HMJLFLJH_00002 hypothetical protein +ATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGT +ACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAA +>HMJLFLJH_00003 hypothetical protein +ATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGCTCGTTATGGT +TTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTAT +GCTAATTTGCATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCATGATGTTATT +TCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGC +TCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGTTAGGCCAG +TTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGCCGCGTTTCTTTGTTCCTGAG +CATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATT +CAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCG +TCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCT +CCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTG +CAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTG +TTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACT +CGCGATTCAATCATGACTTCGTGA +>HMJLFLJH_00004 hypothetical protein +TTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGT +TTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATA +ACAATACTGTAG +>HMJLFLJH_00005 hypothetical protein +ATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTT +TCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAG +GATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGT +GCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACT +AAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAAAAGAG +ATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATGCACAA +AATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAA +AACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAA +GCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTT +AGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCAT +ATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTT +GATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGAAAGAC +GGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/out.fna Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,99 @@ +>contig1 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAA +AAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGAC +TGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTT +GCGACCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAG +TGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTAGATATGAGTCACATTTTGTT +CATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +>contig2 +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAA +TCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTG +GATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGAC +CGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTGGGA +TACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCAT +CCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATT +ATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGT +GCGGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTC +CGCAGCCGTTGCGAGGTACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTG +GTCAACAATTTTAATTGCAGGGGCTTCGGCCCCTTACTTGAGGATAAATTATGTCTAATA +TTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTC +AGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGACTCCTTCGAGA +TGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +>contig3 +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTA +AGTTCATGAAGGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATA +TTGACCATGCCGCTTTTCTTGGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATT +TGTTTCAGGGTTATTTGAATATCTATAACA +>contig4 +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATC +AAGATGATGCTCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGC +TTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCACATCTATTGACATTA +TGGGTCTGCAAGCTGCTTATGCTAATTTGCATACTGACCAAGAACGTGATTACTTCATGC +AGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACC +GTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTG +ACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGC +CTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATA +TTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATG +TTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTT +ATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGG +AACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGT +GTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTG +AGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCG +AAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCA +TAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGC +ACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGT +TAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGC +TAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +>contig5 +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGT +TTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTAT +TGACGTCCTTCCCCGTACGCCGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAA +CTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTAT +TTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCT +TCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCC +GGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGT +TTTGTTTCTG +>contig6 +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTG +CCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAG +GAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTG +CTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGC +TTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAA +AAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATG +CACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTA +CTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCA +AGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTT +CTCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTG +TGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGA +AAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCGTCAGGATTGA +CACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTA +AACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCC +ATAAGCAGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGG +GCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTC +GTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATG +TGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAAT +GGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGT +GTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTT +TGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCA +CGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATT +TTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCACACG +CCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCTTCATT +TCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACA +GTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGC +CTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATG +TGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTA +AAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +>contig7 +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAAT +GACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCC +GTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGT +TACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCG +CGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/out.fsa Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,99 @@ +>contig1 [gcode=11] [organism=Genus species] [strain=strain] +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAA +AAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGAC +TGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTT +GCGACCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAG +TGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTAGATATGAGTCACATTTTGTT +CATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +>contig2 [gcode=11] [organism=Genus species] [strain=strain] +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAA +TCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTG +GATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGAC +CGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTGGGA +TACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCAT +CCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATT +ATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGT +GCGGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTC +CGCAGCCGTTGCGAGGTACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTG +GTCAACAATTTTAATTGCAGGGGCTTCGGCCCCTTACTTGAGGATAAATTATGTCTAATA +TTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTC +AGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGACTCCTTCGAGA +TGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +>contig3 [gcode=11] [organism=Genus species] [strain=strain] +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTA +AGTTCATGAAGGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATA +TTGACCATGCCGCTTTTCTTGGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATT +TGTTTCAGGGTTATTTGAATATCTATAACA +>contig4 [gcode=11] [organism=Genus species] [strain=strain] +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATC +AAGATGATGCTCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGC +TTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCACATCTATTGACATTA +TGGGTCTGCAAGCTGCTTATGCTAATTTGCATACTGACCAAGAACGTGATTACTTCATGC +AGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACC +GTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTG +ACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGC +CTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATA +TTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATG +TTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTT +ATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGG +AACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGT +GTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTG +AGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCG +AAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCA +TAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGC +ACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGT +TAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGC +TAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +>contig5 [gcode=11] [organism=Genus species] [strain=strain] +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGT +TTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTAT +TGACGTCCTTCCCCGTACGCCGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAA +CTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTAT +TTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCT +TCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCC +GGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGT +TTTGTTTCTG +>contig6 [gcode=11] [organism=Genus species] [strain=strain] +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTG +CCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAG +GAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTG +CTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGC +TTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAA +AAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATG +CACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTA +CTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCA +AGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTT +CTCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTG +TGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGA +AAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCGTCAGGATTGA +CACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTA +AACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCC +ATAAGCAGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGG +GCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTC +GTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATG +TGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAAT +GGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGT +GTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTT +TGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCA +CGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATT +TTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCACACG +CCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCTTCATT +TCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACA +GTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGC +CTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATG +TGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTA +AAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +>contig7 [gcode=11] [organism=Genus species] [strain=strain] +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAAT +GACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCC +GTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGT +TACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCG +CGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/out.gbk Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,257 @@ +LOCUS contig1 350 bp DNA linear 27-MAR-2018 +DEFINITION Genus species strain strain. +ACCESSION +VERSION +KEYWORDS . +SOURCE Genus species + ORGANISM Genus species + Unclassified. +COMMENT Annotated using prokka 1.13 from + https://github.com/tseemann/prokka. +FEATURES Location/Qualifiers + source 1..350 + /organism="Genus species" + /mol_type="genomic DNA" + /strain="strain" +ORIGIN + 1 gagttttatc gcttccatga cgcagaagtt aacactttcg gatatttctg atgagtcgaa + 61 aaattatctt gataaagcag gaattactac tgcttgttta cgaattaaat cgaagtggac + 121 tgctggcgga aaatgagaaa attcgaccta tccttgcgca gctcgagaag ctcttacttt + 181 gcgacctttc gccatcaact aacgattctg tcaaaaactg acgcgttgga tgaggagaag + 241 tggcttaata tgcttggcac gttcgtcaag gactggttta gatatgagtc acattttgtt + 301 catggtagag attctcttgt tgacatttta aaagagcgtg gattactatc +// +LOCUS contig2 840 bp DNA linear 27-MAR-2018 +DEFINITION Genus species strain strain. +ACCESSION +VERSION +KEYWORDS . +SOURCE Genus species + ORGANISM Genus species + Unclassified. +COMMENT Annotated using prokka 1.13 from + https://github.com/tseemann/prokka. +FEATURES Location/Qualifiers + source 1..840 + /organism="Genus species" + /mol_type="genomic DNA" + /strain="strain" + CDS 40..498 + /locus_tag="HMJLFLJH_00001" + /inference="ab initio prediction:Prodigal:2.6" + /codon_start=1 + /transl_table=11 + /product="hypothetical protein" + /translation="MSQVTEQSVRFQTALASIKLIQASAVLDLTEDDFDFLTSNKVWI + ATDRSRARRCVEACVYGTLDFVGYPRFPAPVEFIAAVIAYYVHPVNIQTACLIMEGAE + FTENIINGVERPVKAAELFAFTLRVRAGNTDVLTDAEENVRQKLRAEGVM" + CDS 498..614 + /locus_tag="HMJLFLJH_00002" + /inference="ab initio prediction:Prodigal:2.6" + /codon_start=1 + /transl_table=11 + /product="hypothetical protein" + /translation="MSKGKKRSGARPGRPQPLRGTKGKRKGARLWYVGGQQF" +ORIGIN + 1 tgagtccgat gctgttcaac cactaatagg taagaaatca tgagtcaagt tactgaacaa + 61 tccgtacgtt tccagaccgc tttggcctct attaagctca ttcaggcttc tgccgttttg + 121 gatttaaccg aagatgattt cgattttctg acgagtaaca aagtttggat tgctactgac + 181 cgctctcgtg ctcgtcgctg cgttgaggct tgcgtttatg gtacgctgga ctttgtggga + 241 taccctcgct ttcctgctcc tgttgagttt attgctgccg tcattgctta ttatgttcat + 301 cccgtcaaca ttcaaacggc ctgtctcatc atggaaggcg ctgaatttac ggaaaacatt + 361 attaatggcg tcgagcgtcc ggttaaagcc gctgaattgt tcgcgtttac cttgcgtgta + 421 cgcgcaggaa acactgacgt tcttactgac gcagaagaaa acgtgcgtca aaaattacgt + 481 gcggaaggag tgatgtaatg tctaaaggta aaaaacgttc tggcgctcgc cctggtcgtc + 541 cgcagccgtt gcgaggtact aaaggcaagc gtaaaggcgc tcgtctttgg tatgtaggtg + 601 gtcaacaatt ttaattgcag gggcttcggc cccttacttg aggataaatt atgtctaata + 661 ttcaaactgg cgccgagcgt atgccgcatg acctttccca tcttggcttc cttgctggtc + 721 agattggtcg tcttattacc atttcaacta ctccggttat cgctggcgac tccttcgaga + 781 tggacgccgt tggcgctctc cgtctttctc cattgcgtcg tggccttgct attgactcta +// +LOCUS contig3 210 bp DNA linear 27-MAR-2018 +DEFINITION Genus species strain strain. +ACCESSION +VERSION +KEYWORDS . +SOURCE Genus species + ORGANISM Genus species + Unclassified. +COMMENT Annotated using prokka 1.13 from + https://github.com/tseemann/prokka. +FEATURES Location/Qualifiers + source 1..210 + /organism="Genus species" + /mol_type="genomic DNA" + /strain="strain" +ORIGIN + 1 ctgtagacat ttttactttt tatgtccctc atcgtcacgt ttatggtgaa cagtggatta + 61 agttcatgaa ggatggtgtt aatgccactc ctctcccgac tgttaacact actggttata + 121 ttgaccatgc cgcttttctt ggcacgatta accctgatac caataaaatc cctaagcatt + 181 tgtttcaggg ttatttgaat atctataaca +// +LOCUS contig4 1260 bp DNA linear 27-MAR-2018 +DEFINITION Genus species strain strain. +ACCESSION +VERSION +KEYWORDS . +SOURCE Genus species + ORGANISM Genus species + Unclassified. +COMMENT Annotated using prokka 1.13 from + https://github.com/tseemann/prokka. +FEATURES Location/Qualifiers + source 1..1260 + /organism="Genus species" + /mol_type="genomic DNA" + /strain="strain" + CDS 21..884 + /locus_tag="HMJLFLJH_00003" + /inference="ab initio prediction:Prodigal:2.6" + /codon_start=1 + /transl_table=11 + /product="hypothetical protein" + /translation="MPDRTEANPNELNQDDARYGFRCCHLKNIWTAPLPPETELSRQM + TTSTTSIDIMGLQAAYANLHTDQERDYFMQRYHDVISSFGGKTSYDADNRPLLVMRSN + LWASGYDVDGTDQTSLGQFSGRVQQTYKHSVPRFFVPEHGTMFTLALVRFPPTATKEI + QYLNAKGALTYTDIAGDPVLYGNLPPREISMKDVFRSGDSSKKFKIAEGQWYRYAPSY + VSPAYHLLEGFPFIQEPPSGDLQERVLIRHHDYDQCFQSVQLLQWNSQVKFNVTVYRN + LPTTRDSIMTS" +ORIGIN + 1 actattttaa agcgccgtgg atgcctgacc gtaccgaggc taaccctaat gagcttaatc + 61 aagatgatgc tcgttatggt ttccgttgct gccatctcaa aaacatttgg actgctccgc + 121 ttcctcctga gactgagctt tctcgccaaa tgacgacttc taccacatct attgacatta + 181 tgggtctgca agctgcttat gctaatttgc atactgacca agaacgtgat tacttcatgc + 241 agcgttacca tgatgttatt tcttcatttg gaggtaaaac ctcttatgac gctgacaacc + 301 gtcctttact tgtcatgcgc tctaatctct gggcatctgg ctatgatgtt gatggaactg + 361 accaaacgtc gttaggccag ttttctggtc gtgttcaaca gacctataaa cattctgtgc + 421 cgcgtttctt tgttcctgag catggcacta tgtttactct tgcgcttgtt cgttttccgc + 481 ctactgcgac taaagagatt cagtacctta acgctaaagg tgctttgact tataccgata + 541 ttgctggcga ccctgttttg tatggcaact tgccgccgcg tgaaatttct atgaaggatg + 601 ttttccgttc tggtgattcg tctaagaagt ttaagattgc tgagggtcag tggtatcgtt + 661 atgcgccttc gtatgtttct cctgcttatc accttcttga aggcttccca ttcattcagg + 721 aaccgccttc tggtgatttg caagaacgcg tacttattcg ccaccatgat tatgaccagt + 781 gtttccagtc cgttcagttg ttgcagtgga atagtcaggt taaatttaat gtgaccgttt + 841 atcgcaatct gccgaccact cgcgattcaa tcatgacttc gtgataaaag attgagtgtg + 901 aggttataac gccgaagcgg taaaaatttt aatttttgcc gctgaggggt tgaccaagcg + 961 aagcgcggta ggttttctgc ttaggagttt aatcatgttt cagactttta tttctcgcca + 1021 taattcaaac tttttttctg ataagctggt tctcacttct gttactccag cttcttcggc + 1081 acctgtttta cagacaccta aagctacatc gtcaacgtta tattttgata gtttgacggt + 1141 taatgctggt aatggtggtt ttcttcattg cattcagatg gatacatctg tcaacgccgc + 1201 taatcaggtt gtttctgttg gtgctgatat tgcttttgat gccgacccta aattttttgc +// +LOCUS contig5 490 bp DNA linear 27-MAR-2018 +DEFINITION Genus species strain strain. +ACCESSION +VERSION +KEYWORDS . +SOURCE Genus species + ORGANISM Genus species + Unclassified. +COMMENT Annotated using prokka 1.13 from + https://github.com/tseemann/prokka. +FEATURES Location/Qualifiers + source 1..490 + /organism="Genus species" + /mol_type="genomic DNA" + /strain="strain" + CDS 275..406 + /locus_tag="HMJLFLJH_00004" + /inference="ab initio prediction:Prodigal:2.6" + /codon_start=1 + /transl_table=11 + /product="hypothetical protein" + /translation="MVLLLAVLLLLLLVAPCLNCLEAVKKPPPVAFKVMCLLPITIL" +ORIGIN + 1 ctgtttggtt cgctttgagt cttcttcggt tccgactacc ctcccgactg cctatgatgt + 61 ttatcctttg aatggtcgcc atgatggtgg ttattatacc gtcaaggact gtgtgactat + 121 tgacgtcctt ccccgtacgc cgggcaataa cgtttatgtt ggtttcatgg tttggtctaa + 181 ctttaccgct actaaatgcc gcggattggt ttcgctgaat caggttatta aagagattat + 241 ttgtctccag ccacttaagt gaggtgattt atgtttggtg ctattgctgg cggtattgct + 301 tctgctcttg ctggtggcgc catgtctaaa ttgtttggag gcggtcaaaa agccgcctcc + 361 ggtggcattc aaggtgatgt gcttgctacc gataacaata ctgtaggcat gggtgatgct + 421 ggtattaaat ctgccattca aggctctaat gttcctaacc ctgatgaggc cgcccctagt + 481 tttgtttctg +// +LOCUS contig6 1960 bp DNA linear 27-MAR-2018 +DEFINITION Genus species strain strain. +ACCESSION +VERSION +KEYWORDS . +SOURCE Genus species + ORGANISM Genus species + Unclassified. +COMMENT Annotated using prokka 1.13 from + https://github.com/tseemann/prokka. +FEATURES Location/Qualifiers + source 1..1960 + /organism="Genus species" + /mol_type="genomic DNA" + /strain="strain" + CDS 6..767 + /locus_tag="HMJLFLJH_00005" + /inference="ab initio prediction:Prodigal:2.6" + /codon_start=1 + /transl_table=11 + /product="hypothetical protein" + /translation="MAKAGKGLLEGTLQAGTSAVSDKLLDLVGLGGKSAADKGKDTRD + YLAAAFPELNAWERAGADASSAGMVDAGFENQKELTKMQLDNQKEIAEMQNETQKEIA + GIQSATSRQNTKDQVYAQNEMLAYQQKESTARVASIMENTNLSKQQQVSEIMRQMLTQ + AQTAGQYFTNDQIKEMTRKVSAEVDLVHQQTQNQRYGSSHIGATAKDISNVVTDAASG + VVDIFHGIDKAVADTWNNFWKDGKADGIGSNLSRK" +ORIGIN + 1 gtgctatggc taaagctggt aaaggacttc ttgaaggtac gttgcaggct ggcacttctg + 61 ccgtttctga taagttgctt gatttggttg gacttggtgg caagtctgcc gctgataaag + 121 gaaaggatac tcgtgattat cttgctgctg catttcctga gcttaatgct tgggagcgtg + 181 ctggtgctga tgcttcctct gctggtatgg ttgacgccgg atttgagaat caaaaagagc + 241 ttactaaaat gcaactggac aatcagaaag agattgccga gatgcaaaat gagactcaaa + 301 aagagattgc tggcattcag tcggcgactt cacgccagaa tacgaaagac caggtatatg + 361 cacaaaatga gatgcttgct tatcaacaga aggagtctac tgctcgcgtt gcgtctatta + 421 tggaaaacac caatctttcc aagcaacagc aggtttccga gattatgcgc caaatgctta + 481 ctcaagctca aacggctggt cagtatttta ccaatgacca aatcaaagaa atgactcgca + 541 aggttagtgc tgaggttgac ttagttcatc agcaaacgca gaatcagcgg tatggctctt + 601 ctcatattgg cgctactgca aaggatattt ctaatgtcgt cactgatgct gcttctggtg + 661 tggttgatat ttttcatggt attgataaag ctgttgccga tacttggaac aatttctgga + 721 aagacggtaa agctgatggt attggctcta atttgtctag gaaataaccg tcaggattga + 781 caccctccca attgtatgtt ttcatgcctc caaatcttgg aggctttttt atggttcgtt + 841 cttattaccc ttctgaatgt cacgctgatt attttgactt tgagcgtatc gaggctctta + 901 aacctgctat tgaggcttgt ggcatttcta ctctttctca atccccaatg cttggcttcc + 961 ataagcagat ggataaccgc atcaagctct tggaagagat tctgtctttt cgtatgcagg + 1021 gcgttgagtt cgataatggt gatatgtatg ttgacggcca taaggctgct tctgacgttc + 1081 gtgatgagtt tgtatctgtt actgagaagt taatggatga attggcacaa tgctacaatg + 1141 tgctccccca acttgatatt aataacacta tagaccaccg ccccgaaggg gacgaaaaat + 1201 ggtttttaga gaacgagaag acggttacgc agttttgccg caagctggct gctgaacgcc + 1261 ctcttaagga tattcgcgat gagtataatt accccaaaaa gaaaggtatt aaggatgagt + 1321 gttcaagatt gctggaggcc tccactatga aatcgcgtag aggctttgct attcagcgtt + 1381 tgatgaatgc aatgcgacag gctcatgctg atggttggtt tatcgttttt gacactctca + 1441 cgttggctga cgaccgatta gaggcgtttt atgataatcc caatgctttg cgtgactatt + 1501 ttcgtgatat tggtcgtatg gttcttgctg ccgagggtcg caaggctaat gattcacacg + 1561 ccgactgcta tcagtatttt tgtgtgcctg agtatggtac agctaatggc cgtcttcatt + 1621 tccatgcggt gcactttatg cggacacttc ctacaggtag cgttgaccct aattttggtc + 1681 gtcgggtacg caatcgccgc cagttaaata gcttgcaaaa tacgtggcct tatggttaca + 1741 gtatgcccat cgcagttcgc tacacgcagg acgctttttc acgttctggt tggttgtggc + 1801 ctgttgatgc taaaggtgag ccgcttaaag ctaccagtta tatggctgtt ggtttctatg + 1861 tggctaaata cgttaacaaa aagtcagata tggaccttgc tgctaaaggt ctaggagcta + 1921 aagaatggaa caactcacta aaaaccaagc tgtcgctact +// +LOCUS contig7 276 bp DNA linear 27-MAR-2018 +DEFINITION Genus species strain strain. +ACCESSION +VERSION +KEYWORDS . +SOURCE Genus species + ORGANISM Genus species + Unclassified. +COMMENT Annotated using prokka 1.13 from + https://github.com/tseemann/prokka. +FEATURES Location/Qualifiers + source 1..276 + /organism="Genus species" + /mol_type="genomic DNA" + /strain="strain" +ORIGIN + 1 tcccaagaag ctgttcagaa tcagaatgag ccgcaacttc gggatgaaaa tgctcacaat + 61 gacaaatctg tccacggagt gcttaatcca acttaccaag ctgggttacg acgcgacgcc + 121 gttcaaccag atattgaagc agaacgcaaa aagagagatg agattgaggc tgggaaaagt + 181 tactgtagcc gacgttttgg cggcgcaacc tgtgacgaca aatctgctca aatttatgcg + 241 cgcttcgata aaaatgattg gcgtatccaa cctgca +//
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/out.gff Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,113 @@ +##gff-version 3 +##sequence-region contig1 1 350 +##sequence-region contig2 1 840 +##sequence-region contig3 1 210 +##sequence-region contig4 1 1260 +##sequence-region contig5 1 490 +##sequence-region contig6 1 1960 +##sequence-region contig7 1 276 +contig2 Prodigal:002006 CDS 40 498 . + 0 ID=HMJLFLJH_00001;inference=ab initio prediction:Prodigal:002006;locus_tag=HMJLFLJH_00001;product=hypothetical protein +contig2 Prodigal:002006 CDS 498 614 . + 0 ID=HMJLFLJH_00002;inference=ab initio prediction:Prodigal:002006;locus_tag=HMJLFLJH_00002;product=hypothetical protein +contig4 Prodigal:002006 CDS 21 884 . + 0 ID=HMJLFLJH_00003;inference=ab initio prediction:Prodigal:002006;locus_tag=HMJLFLJH_00003;product=hypothetical protein +contig5 Prodigal:002006 CDS 275 406 . + 0 ID=HMJLFLJH_00004;inference=ab initio prediction:Prodigal:002006;locus_tag=HMJLFLJH_00004;product=hypothetical protein +contig6 Prodigal:002006 CDS 6 767 . + 0 ID=HMJLFLJH_00005;inference=ab initio prediction:Prodigal:002006;locus_tag=HMJLFLJH_00005;product=hypothetical protein +##FASTA +>contig1 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAA +AAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGAC +TGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTT +GCGACCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAG +TGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTAGATATGAGTCACATTTTGTT +CATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +>contig2 +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAA +TCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTG +GATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGAC +CGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTGGGA +TACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCAT +CCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATT +ATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGT +GCGGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTC +CGCAGCCGTTGCGAGGTACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTG +GTCAACAATTTTAATTGCAGGGGCTTCGGCCCCTTACTTGAGGATAAATTATGTCTAATA +TTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTC +AGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGACTCCTTCGAGA +TGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +>contig3 +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTA +AGTTCATGAAGGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATA +TTGACCATGCCGCTTTTCTTGGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATT +TGTTTCAGGGTTATTTGAATATCTATAACA +>contig4 +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATC +AAGATGATGCTCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGC +TTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCACATCTATTGACATTA +TGGGTCTGCAAGCTGCTTATGCTAATTTGCATACTGACCAAGAACGTGATTACTTCATGC +AGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACC +GTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTG +ACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGC +CTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATA +TTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATG +TTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTT +ATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGG +AACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGT +GTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTG +AGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCG +AAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCA +TAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGC +ACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGT +TAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGC +TAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +>contig5 +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGT +TTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTAT +TGACGTCCTTCCCCGTACGCCGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAA +CTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTAT +TTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCT +TCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCC +GGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGT +TTTGTTTCTG +>contig6 +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTG +CCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAG +GAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTG +CTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGC +TTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAA +AAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATG +CACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTA +CTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCA +AGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTT +CTCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTG +TGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGA +AAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCGTCAGGATTGA +CACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTA +AACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCC +ATAAGCAGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGG +GCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTC +GTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATG +TGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAAT +GGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGT +GTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTT +TGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCA +CGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATT +TTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCACACG +CCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCTTCATT +TCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACA +GTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGC +CTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATG +TGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTA +AAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +>contig7 +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAAT +GACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCC +GTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGT +TACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCG +CGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/out.sqn Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,836 @@ +Seq-entry ::= set { + class genbank , + seq-set { + seq { + id { + local + str "contig1" } , + descr { + source { + org { + taxname "Genus species" , + orgname { + mod { + { + subtype strain , + subname "strain" } } , + gcode 11 } } } , + molinfo { + biomol genomic } , + comment "Annotated using prokka 1.13 from + https://github.com/tseemann/prokka" , + user { + type + str "NcbiCleanup" , + data { + { + label + str "method" , + data + str "SeriousSeqEntryCleanup" } , + { + label + str "version" , + data + int 8 } , + { + label + str "month" , + data + int 3 } , + { + label + str "day" , + data + int 27 } , + { + label + str "year" , + data + int 2018 } } } , + create-date + std { + year 2018 , + month 3 , + day 27 } } , + inst { + repr raw , + mol dna , + length 350 , + seq-data + iupacna "GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGA +AAAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAG +AAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTGTCAAA +AACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTAGATATGAGTCACA +TTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC" } } , + set { + class nuc-prot , + descr { + source { + org { + taxname "Genus species" , + orgname { + mod { + { + subtype strain , + subname "strain" } } , + gcode 11 } } } , + comment "Annotated using prokka 1.13 from + https://github.com/tseemann/prokka" , + user { + type + str "NcbiCleanup" , + data { + { + label + str "method" , + data + str "SeriousSeqEntryCleanup" } , + { + label + str "version" , + data + int 8 } , + { + label + str "month" , + data + int 3 } , + { + label + str "day" , + data + int 27 } , + { + label + str "year" , + data + int 2018 } } } , + create-date + std { + year 2018 , + month 3 , + day 27 } } , + seq-set { + seq { + id { + local + str "contig2" } , + descr { + molinfo { + biomol genomic } } , + inst { + repr raw , + mol dna , + length 840 , + seq-data + iupacna "TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTG +AACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAG +ATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCTT +GCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTT +ATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATTATTAATG +GCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTACGCGCAGGAAACACTGACGTTCTTA +CTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCGGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGG +CGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGG +TCAACAATTTTAATTGCAGGGGCTTCGGCCCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCG +TATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGT +TATCGCTGGCGACTCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGA +CTCTA" } } , + seq { + id { + local + str "contig2_1" } , + descr { + title "hypothetical protein HMJLFLJH_00001 [Genus species]" , + molinfo { + biomol peptide , + tech concept-trans } } , + inst { + repr raw , + mol aa , + length 152 , + seq-data + ncbieaa "MSQVTEQSVRFQTALASIKLIQASAVLDLTEDDFDFLTSNKVWIATDRSRARRCV +EACVYGTLDFVGYPRFPAPVEFIAAVIAYYVHPVNIQTACLIMEGAEFTENIINGVERPVKAAELFAFTLRVRAGNTD +VLTDAEENVRQKLRAEGVM" } , + annot { + { + data + ftable { + { + id + local + id 3 , + data + prot { + name { + "hypothetical protein" } } , + location + int { + from 0 , + to 151 , + id + local + str "contig2_1" } } } } } } , + seq { + id { + local + str "contig2_2" } , + descr { + title "hypothetical protein HMJLFLJH_00002 [Genus species]" , + molinfo { + biomol peptide , + tech concept-trans } } , + inst { + repr raw , + mol aa , + length 38 , + seq-data + ncbieaa "MSKGKKRSGARPGRPQPLRGTKGKRKGARLWYVGGQQF" } , + annot { + { + data + ftable { + { + id + local + id 4 , + data + prot { + name { + "hypothetical protein" } } , + location + int { + from 0 , + to 37 , + id + local + str "contig2_2" } } } } } } } , + annot { + { + data + ftable { + { + id + local + id 1 , + data + cdregion { + frame one , + code { + id 11 } } , + product + whole + local + str "contig2_1" , + location + int { + from 39 , + to 497 , + strand plus , + id + local + str "contig2" } , + qual { + { + qual "inference" , + val "ab initio prediction:Prodigal:2.6" } } , + xref { + { + data + gene { + locus-tag "HMJLFLJH_00001" } } } } , + { + id + local + id 2 , + data + cdregion { + frame one , + code { + id 11 } } , + product + whole + local + str "contig2_2" , + location + int { + from 497 , + to 613 , + strand plus , + id + local + str "contig2" } , + qual { + { + qual "inference" , + val "ab initio prediction:Prodigal:2.6" } } , + xref { + { + data + gene { + locus-tag "HMJLFLJH_00002" } } } } } } } } , + seq { + id { + local + str "contig3" } , + descr { + source { + org { + taxname "Genus species" , + orgname { + mod { + { + subtype strain , + subname "strain" } } , + gcode 11 } } } , + molinfo { + biomol genomic } , + comment "Annotated using prokka 1.13 from + https://github.com/tseemann/prokka" , + user { + type + str "NcbiCleanup" , + data { + { + label + str "method" , + data + str "SeriousSeqEntryCleanup" } , + { + label + str "version" , + data + int 8 } , + { + label + str "month" , + data + int 3 } , + { + label + str "day" , + data + int 27 } , + { + label + str "year" , + data + int 2018 } } } , + create-date + std { + year 2018 , + month 3 , + day 27 } } , + inst { + repr raw , + mol dna , + length 210 , + seq-data + iupacna "CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATT +AAGTTCATGAAGGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTT +CTTGGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA" } } , + set { + class nuc-prot , + descr { + source { + org { + taxname "Genus species" , + orgname { + mod { + { + subtype strain , + subname "strain" } } , + gcode 11 } } } , + comment "Annotated using prokka 1.13 from + https://github.com/tseemann/prokka" , + user { + type + str "NcbiCleanup" , + data { + { + label + str "method" , + data + str "SeriousSeqEntryCleanup" } , + { + label + str "version" , + data + int 8 } , + { + label + str "month" , + data + int 3 } , + { + label + str "day" , + data + int 27 } , + { + label + str "year" , + data + int 2018 } } } , + create-date + std { + year 2018 , + month 3 , + day 27 } } , + seq-set { + seq { + id { + local + str "contig4" } , + descr { + molinfo { + biomol genomic } } , + inst { + repr raw , + mol dna , + length 1260 , + seq-data + iupacna "ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCT +TAATCAAGATGATGCTCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGAC +TGAGCTTTCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCA +TACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGA +CGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAAC +GTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGCCGCGTTTCTTTGTTCCTGAGCATGG +CACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGC +TTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGT +TTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTC +TCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTAT +TCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGAC +CGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCC +GAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGCTTAGGAGTTTA +ATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCA +GCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCT +GGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCT +GATATTGCTTTTGATGCCGACCCTAAATTTTTTGC" } } , + seq { + id { + local + str "contig4_1" } , + descr { + title "hypothetical protein HMJLFLJH_00003 [Genus species]" , + molinfo { + biomol peptide , + tech concept-trans } } , + inst { + repr raw , + mol aa , + length 287 , + seq-data + ncbieaa "MPDRTEANPNELNQDDARYGFRCCHLKNIWTAPLPPETELSRQMTTSTTSIDIMG +LQAAYANLHTDQERDYFMQRYHDVISSFGGKTSYDADNRPLLVMRSNLWASGYDVDGTDQTSLGQFSGRVQQTYKHSV +PRFFVPEHGTMFTLALVRFPPTATKEIQYLNAKGALTYTDIAGDPVLYGNLPPREISMKDVFRSGDSSKKFKIAEGQW +YRYAPSYVSPAYHLLEGFPFIQEPPSGDLQERVLIRHHDYDQCFQSVQLLQWNSQVKFNVTVYRNLPTTRDSIMTS" } , + annot { + { + data + ftable { + { + id + local + id 6 , + data + prot { + name { + "hypothetical protein" } } , + location + int { + from 0 , + to 286 , + id + local + str "contig4_1" } } } } } } } , + annot { + { + data + ftable { + { + id + local + id 5 , + data + cdregion { + frame one , + code { + id 11 } } , + product + whole + local + str "contig4_1" , + location + int { + from 20 , + to 883 , + strand plus , + id + local + str "contig4" } , + qual { + { + qual "inference" , + val "ab initio prediction:Prodigal:2.6" } } , + xref { + { + data + gene { + locus-tag "HMJLFLJH_00003" } } } } } } } } , + set { + class nuc-prot , + descr { + source { + org { + taxname "Genus species" , + orgname { + mod { + { + subtype strain , + subname "strain" } } , + gcode 11 } } } , + comment "Annotated using prokka 1.13 from + https://github.com/tseemann/prokka" , + user { + type + str "NcbiCleanup" , + data { + { + label + str "method" , + data + str "SeriousSeqEntryCleanup" } , + { + label + str "version" , + data + int 8 } , + { + label + str "month" , + data + int 3 } , + { + label + str "day" , + data + int 27 } , + { + label + str "year" , + data + int 2018 } } } , + create-date + std { + year 2018 , + month 3 , + day 27 } } , + seq-set { + seq { + id { + local + str "contig5" } , + descr { + molinfo { + biomol genomic } } , + inst { + repr raw , + mol dna , + length 490 , + seq-data + iupacna "CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTAT +GATGTTTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCC +CGTACGCCGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGTT +TCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTG +GCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCA +TTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCT +CTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG" } } , + seq { + id { + local + str "contig5_1" } , + descr { + title "hypothetical protein HMJLFLJH_00004 [Genus species]" , + molinfo { + biomol peptide , + tech concept-trans } } , + inst { + repr raw , + mol aa , + length 43 , + seq-data + ncbieaa "MVLLLAVLLLLLLVAPCLNCLEAVKKPPPVAFKVMCLLPITIL" } , + annot { + { + data + ftable { + { + id + local + id 8 , + data + prot { + name { + "hypothetical protein" } } , + location + int { + from 0 , + to 42 , + id + local + str "contig5_1" } } } } } } } , + annot { + { + data + ftable { + { + id + local + id 7 , + data + cdregion { + frame one , + code { + id 11 } } , + product + whole + local + str "contig5_1" , + location + int { + from 274 , + to 405 , + strand plus , + id + local + str "contig5" } , + qual { + { + qual "inference" , + val "ab initio prediction:Prodigal:2.6" } } , + xref { + { + data + gene { + locus-tag "HMJLFLJH_00004" } } } } } } } } , + set { + class nuc-prot , + descr { + source { + org { + taxname "Genus species" , + orgname { + mod { + { + subtype strain , + subname "strain" } } , + gcode 11 } } } , + comment "Annotated using prokka 1.13 from + https://github.com/tseemann/prokka" , + user { + type + str "NcbiCleanup" , + data { + { + label + str "method" , + data + str "SeriousSeqEntryCleanup" } , + { + label + str "version" , + data + int 8 } , + { + label + str "month" , + data + int 3 } , + { + label + str "day" , + data + int 27 } , + { + label + str "year" , + data + int 2018 } } } , + create-date + std { + year 2018 , + month 3 , + day 27 } } , + seq-set { + seq { + id { + local + str "contig6" } , + descr { + molinfo { + biomol genomic } } , + inst { + repr raw , + mol dna , + length 1960 , + seq-data + iupacna "GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCAC +TTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCG +TGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGT +TGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAA +TGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATGCACAAAA +TGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCA +ACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAAT +CAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTC +TCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGG +TATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTC +TAGGAAATAACCGTCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGT +TCGTTCTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGA +GGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGCATCAAGCTCTT +GGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGC +TTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATGTGCTCCC +CCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGT +TACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAA +AGGTATTAAGGATGAGTGTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTT +GATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATT +AGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGG +TCGCAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCT +TCATTTCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTCGTCGGGTACGCAA +TCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGA +CGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGT +TGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATG +GAACAACTCACTAAAAACCAAGCTGTCGCTACT" } } , + seq { + id { + local + str "contig6_1" } , + descr { + title "hypothetical protein HMJLFLJH_00005 [Genus species]" , + molinfo { + biomol peptide , + tech concept-trans } } , + inst { + repr raw , + mol aa , + length 253 , + seq-data + ncbieaa "MAKAGKGLLEGTLQAGTSAVSDKLLDLVGLGGKSAADKGKDTRDYLAAAFPELNA +WERAGADASSAGMVDAGFENQKELTKMQLDNQKEIAEMQNETQKEIAGIQSATSRQNTKDQVYAQNEMLAYQQKESTA +RVASIMENTNLSKQQQVSEIMRQMLTQAQTAGQYFTNDQIKEMTRKVSAEVDLVHQQTQNQRYGSSHIGATAKDISNV +VTDAASGVVDIFHGIDKAVADTWNNFWKDGKADGIGSNLSRK" } , + annot { + { + data + ftable { + { + id + local + id 10 , + data + prot { + name { + "hypothetical protein" } } , + location + int { + from 0 , + to 252 , + id + local + str "contig6_1" } } } } } } } , + annot { + { + data + ftable { + { + id + local + id 9 , + data + cdregion { + frame one , + code { + id 11 } } , + product + whole + local + str "contig6_1" , + location + int { + from 5 , + to 766 , + strand plus , + id + local + str "contig6" } , + qual { + { + qual "inference" , + val "ab initio prediction:Prodigal:2.6" } } , + xref { + { + data + gene { + locus-tag "HMJLFLJH_00005" } } } } } } } } , + seq { + id { + local + str "contig7" } , + descr { + source { + org { + taxname "Genus species" , + orgname { + mod { + { + subtype strain , + subname "strain" } } , + gcode 11 } } } , + molinfo { + biomol genomic } , + comment "Annotated using prokka 1.13 from + https://github.com/tseemann/prokka" , + user { + type + str "NcbiCleanup" , + data { + { + label + str "method" , + data + str "SeriousSeqEntryCleanup" } , + { + label + str "version" , + data + int 8 } , + { + label + str "month" , + data + int 3 } , + { + label + str "day" , + data + int 27 } , + { + label + str "year" , + data + int 2018 } } } , + create-date + std { + year 2018 , + month 3 , + day 27 } } , + inst { + repr raw , + mol dna , + length 276 , + seq-data + iupacna "TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAA +TGACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGA +AGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGA +CGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA" } } } }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/out.tbl Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,27 @@ +>Feature contig1 +>Feature contig2 +40 498 CDS + inference ab initio prediction:Prodigal:002006 + locus_tag HMJLFLJH_00001 + product hypothetical protein +498 614 CDS + inference ab initio prediction:Prodigal:002006 + locus_tag HMJLFLJH_00002 + product hypothetical protein +>Feature contig3 +>Feature contig4 +21 884 CDS + inference ab initio prediction:Prodigal:002006 + locus_tag HMJLFLJH_00003 + product hypothetical protein +>Feature contig5 +275 406 CDS + inference ab initio prediction:Prodigal:002006 + locus_tag HMJLFLJH_00004 + product hypothetical protein +>Feature contig6 +6 767 CDS + inference ab initio prediction:Prodigal:002006 + locus_tag HMJLFLJH_00005 + product hypothetical protein +>Feature contig7
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/out.tsv Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,6 @@ +locus_tag ftype length_bp gene EC_number COG product +HMJLFLJH_00001 CDS 459 hypothetical protein +HMJLFLJH_00002 CDS 117 hypothetical protein +HMJLFLJH_00003 CDS 864 hypothetical protein +HMJLFLJH_00004 CDS 132 hypothetical protein +HMJLFLJH_00005 CDS 762 hypothetical protein
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/out.txt Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,4 @@ +organism: Genus species strain +contigs: 7 +bases: 5386 +CDS: 5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mito-prokka/test-data/phiX174.fasta Wed May 26 16:01:54 2021 +0000 @@ -0,0 +1,85 @@ +>contig1 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +>contig2 +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCGGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +>contig3 +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +>contig4 +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +>contig5 +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +>contig6 +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +>contig7 +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA +