changeset 0:c71024206080 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastoma commit 97621157bc93d4c8b6a9c86c27557b67f8737ca9
author iuc
date Fri, 26 Dec 2025 09:46:34 +0000
parents
children
files fastoma.xml macros.xml test-data/db/test.h5 test-data/expected-species-tree.nwk test-data/omamer.loc test-data/species_tree.nwk test-data/test-proteomes/AQUAE.fa test-data/test-proteomes/CHLTR.fa test-data/test-proteomes/MYCGE.fa tool-data/omamer.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 12 files changed, 454 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastoma.xml	Fri Dec 26 09:46:34 2025 +0000
@@ -0,0 +1,169 @@
+<tool id="fastoma" name="FastOMA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
+    <description>Infer orthology relationships</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <xrefs>
+        <xref type="bio.tools">fastoma</xref>
+    </xrefs>
+    <requirements>
+        <expand macro="requirements" />
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+    #import re
+    mkdir -p input/proteome  &&
+
+    #for $proteome in $proteomes:
+        #set $safename = re.sub('[^\w\-_\.]', '_', $proteome.element_identifier)
+        ln -s    '$proteome' 'input/proteome/${safename}' &&
+    #end for
+    ln -s '$species_tree' input/species_tree.nwk 
+    &&
+    printf '%s\n' \
+             'process {' \
+             '  resourceLimits = [' \
+             "    cpus: \${GALAXY_SLOTS}," \
+             "    memory: \${GALAXY_MEMORY_MB:-8192}.MB," \
+             '    time: 72.h' \
+             '  ]' \
+             '}' > galaxy.conf
+    &&
+
+    nextflow run dessimozlab/FastOMA -r v@TOOL_VERSION@ -latest 
+        -c galaxy.conf
+        -ansi-log false 
+        --input  input
+        --output_folder results
+        --omamer_db '$database.fields.path' 
+    #if $report
+        --report 
+    #end if
+    #if $force_pairwise_ortholog_generation
+        --force_pairwise_ortholog_generation
+    #end if
+    ]]></command>
+    <inputs>
+        <param name="proteomes" type="data" multiple="true" format="fasta" label="Protein sequences of proteome"/>
+        <param name="species_tree" type="data" format="newick" label="Species tree in newick format" help="The species tree must contain all species present in the proteome files"/>
+        <param name="database" label="OMAmer database" type="select">
+            <options from_data_table="omamer"/>
+        </param>
+        <param argument="--report" type="boolean" truevalue="--report" falsevalue="" label="Generate report" help="create a report on FastOMA run"/>
+        <param argument="--force_pairwise_ortholog_generation" type="boolean" truevalue="--force_pairwise_ortholog_generation" falsevalue="" label="Generate pairwise orthologs" help="Force generation of pairwise orthologs even if large number of species are provided (may take long time)"/>
+    </inputs>
+    <outputs>
+        <data format="xml" name="FastOMA_HOGs" label="Hierarchical Orthologous groups (HOGs) by ${tool.name} on ${on_string}" from_work_dir="results/FastOMA_HOGs.orthoxml" />
+        <data format="html" name="report_html" label="${tool.name} on ${on_string} Report (HTML)" from_work_dir="results/report.html"><filter>report</filter></data>
+        <data format="ipynb" name="report_ipynb" label="${tool.name} on ${on_string} Report (Notebook)" from_work_dir="results/report.ipynb"><filter>report</filter></data>
+        <collection type="list" name="RootHOGsFasta" label="${tool.name} on ${on_string} fasta files per root-level HOGs">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.fa\.gz" directory="results/RootHOGsFasta/" format="fasta" visible="true"/>
+        </collection> 
+        <data format="tabular" name="RootHOGsTSV" label="${tool.name} on ${on_string} protein assignments to RootHOGs" from_work_dir="results/RootHOGs.tsv" />
+        <data format="tabular" name="OrthologousGroupsTSV" label="Single copy orthologous groups by ${tool.name} on ${on_string}" from_work_dir="results/OrthologousGroups.tsv" />
+        <collection type="list" name="OrthologousGroupsFasta" label="Single copy orthologous groups fasta files by ${tool.name} on ${on_string}">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.fa\.gz" directory="results/OrthologousGroupsFasta/" format="fasta" visible="true"/>
+        </collection>
+        <data format="tabular" name="PairwiseOrthologs" label="Pairwise orthologs by ${tool.name} on ${on_string}" from_work_dir="results/orthologs.tsv.gz"><filter>force_pairwise_ortholog_generation</filter></data>
+        <data format="newick" name="species_tree_checked" label="Checked species tree by ${tool.name} on ${on_string}" from_work_dir="results/species_tree_checked.nwk" />
+    </outputs>
+    <tests>
+        <test expect_num_outputs="9">
+            <param name="proteomes" value="test-proteomes/AQUAE.fa,test-proteomes/CHLTR.fa,test-proteomes/MYCGE.fa"/>
+            <param name="species_tree" value="species_tree.nwk" ftype="newick"/>
+            <param name="database" value="test"/>
+            <param name="report" value="true"/>
+            <param name="force_pairwise_ortholog_generation" value="true"/>
+            <output name="FastOMA_HOGs">
+                <assert_contents>
+                    <is_valid_xml />
+                    <has_text_matching expression="&lt;orthoXML .* origin=.FastOMA @TOOL_VERSION@" />
+                    <has_text_matching expression="&lt;species " n="3" />
+                    <has_size size="7053" delta="300" />
+                </assert_contents>
+            </output>
+            <output_collection name="RootHOGsFasta" type="list" count="12">
+                <element name="HOG0000001" decompress="true">
+                    <assert_contents>
+                        <has_text_matching expression="^>" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="OrthologousGroupsFasta" type="list" count="12">
+                <element name="OG_0000001" decompress="true">
+                    <assert_contents>
+                        <has_text_matching expression="^>" />
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="PairwiseOrthologs" decompress="true">
+                <assert_contents>
+                    <has_line_matching expression="sp\|O66778\|ENO_AQUAE\tsp\|O84591\|ENO_CHLTR" />
+                    <has_n_columns n="2" />
+                </assert_contents>
+            </output>
+            <output name="species_tree_checked" file="expected-species-tree.nwk" ftype="newick"/>
+            <output name="RootHOGsTSV">
+                <assert_contents>
+                    <has_line_matching expression="HOG:\d+\tsp\|O66429\|EFTU_AQUAE\tHOG:\d+" />
+                    <has_n_columns n="3" />
+                </assert_contents>
+            </output>
+            <output name="OrthologousGroupsTSV">
+                <assert_contents>
+                    <has_line_matching expression="OG_\d+\tsp\|O84332\|TPIS_CHLTR" />
+                    <has_n_columns n="2" />
+                </assert_contents>
+            </output>
+            <output name="report_ipynb">
+                <assert_contents>
+                    <has_text text="Number of RootHOGs: 12" />
+                    <has_text text="There are 2 taxonomic levels in the input species tree with 3 species as leaves." />
+                    <has_json_property_with_value property="execution_count" value="25" />
+                </assert_contents>
+            </output>
+            <output name="report_html">
+                <assert_contents>
+                    <has_text text="Number of RootHOGs: 12" />
+                    <has_text text="There are 2 taxonomic levels in the input species tree with 3 species as leaves." />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+FastOMA is a software tool to infer orthology relationships among multiple species based on their proteomes.
+
+The main output of FastOMA are Hierarchical Orthologous Groups (HOGs) in OrthoXML format, which represent 
+groups of genes that have evolved from a common ancestor. These groups are hierarchically nested, 
+reflecting the evolutionary relationships among the species. 
+
+.. class:: warningmark
+    
+The galaxy tool of FastOMA is not intended for large scale analysis. All steps are run on a single machine, 
+which usually is not suitable for large datasets. For large scale analysis, please use FastOMA directly 
+through Nextflow as described in the the documentation: https://github.com/dessimozlab/FastOMA
+
+**Input data**
+
+- Protein sequences of proteomes: Provide the protein sequences of the species in FASTA format. Each proteome should be in a separate file.
+
+- Species tree in Newick format: Provide a species tree that includes all species present in the proteome files.
+
+- OMAmer database: Select an OMAmer database from the available options. Usually it is benefitial to use the most comprehensive database available (e.g. LUCA). 
+
+**Outputs**
+
+- *Hierarchical Orthologous groups (HOGs) by FastOMA*: The main output file in OrthoXML format containing the inferred HOGs.
+
+- *FastOMA Report*: An jupyter notebook and HTML report summarizing the results of the FastOMA run.
+
+- *FastOMA fasta files per root-level HOGs*: TSV and FASTA files listing the sequences for each root-level HOG.
+
+- *Single copy orthologous groups*: TSV and FASTA files listing orthologous groups with one protein per species at maximum across the provided proteomes. This data is mostly usefull for species tree reconstruction.
+
+- *Pairwise orthologs*: TSV file listing all pairwise orthologs inferred by FastOMA.
+
+- *Checked species tree*: The species tree after checking and reformatting by FastOMA. Internal nodes will also be labeled if they were not labeled in the input tree. Those labels are used to name the internal HOG levels.
+
+]]></help>
+<expand macro="citation" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Dec 26 09:46:34 2025 +0000
@@ -0,0 +1,16 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.5.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+
+    <xml name="requirements">
+        <requirement type="package" version="@TOOL_VERSION@">fastoma</requirement>
+    </xml>
+
+    <xml name="citation">
+        <citations>
+            <citation type="doi">10.1038/s41592-024-02552-8</citation>
+        </citations>
+    </xml>
+
+</macros>
+
Binary file test-data/db/test.h5 has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expected-species-tree.nwk	Fri Dec 26 09:46:34 2025 +0000
@@ -0,0 +1,1 @@
+((AQUAE:1,CHLTR:1)inter1:1,MYCGE:1)inter2:0;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/omamer.loc	Fri Dec 26 09:46:34 2025 +0000
@@ -0,0 +1,6 @@
+# Tab separated with 4 columns:
+# - value
+# - name
+# - version
+# - /path/to/data
+test	test	2.0.2	${__HERE__}/db/test.h5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/species_tree.nwk	Fri Dec 26 09:46:34 2025 +0000
@@ -0,0 +1,1 @@
+((AQUAE,CHLTR)inter1,MYCGE)inter2;
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-proteomes/AQUAE.fa	Fri Dec 26 09:46:34 2025 +0000
@@ -0,0 +1,104 @@
+>sp|O67618|LEPA_AQUAE
+MEQKNVRNFCIIAHVDHGKSTLADRLLEYTGAISEREKREQLLDTLDVERERGITVKMQA
+VRMFYKAKDGNTYKLHLIDTPGHVDFSYEVSRALAACEGALLLIDASQGIEAQTVANFWK
+AVEQDLVIIPVINKIDLPSADVDRVKKQIEEVLGLDPEEAILASAKEGIGIEEILEAIVN
+RIPPPKGDPQKPLKALIFDSYYDPYRGAVAFVRIFDGEVKPGDKIMLMSTGKEYEVTEVG
+AQTPKMTKFDKLSAGDVGYIAASIKDVRDIRIGDTITHAKNPTKEPVPGFQPAKPMVYAG
+IYPAEDTTYEELRDALEKYAINDAAIVYEPESSPALGMGFRVGFLGLLHMEIVQERLERE
+YGVKIITTAPNVIYRVKKKFTDEVIEVRNPMDFPDNAGLIEYVEEPFVLVTIITPKEYVG
+PIIQLCQEKRGIQKNMTYLDPNTVYLEYEMPLSEIIVDFHDKIKSISRGFASYDYEFIGY
+RPSDLIKLTVLINKKPVDALSFIVHADRAQKFARRVAEKLRETIPRQLFEVHIQVAKGGK
+VIASERIKPLRANVTAKCYGGDVTRKKKLLENQKEGKKRMKQFGKVQLPQEAFLSVLKVE
+>sp|O67032|RF1_AQUAE
+MLKEAYISRLDKLQEKYRKLQEELSKPEVIQDVEKYKKLSKELKELQEINELYERYKKAQ
+KELKEAKELLKSSDKDLRELAEEEVNRLTEEMKKLEEELKVHLVPKDPNDTKNVILEIRA
+GAGGEEAALFAADLFRMYQKYAEEKGWKVSILSSNKTGLGGYKEVIALIEGEGAYSRLKY
+ESGVHRVQRVPVTESSGRIHTSTATVAVLPEVDETDIKIKPEELKIETFRASGAGGQYVN
+TTETAVRITHIPTGIVVQCQDERSQFQNKQKALKILYAKLKDYYERKKQEEIAKERKEQV
+GTGERSEKIRTYNFPQNRVTDHRINLTLYKLQDVLEGKLDEIIDALRAKEIEKKLELVEK
+EG
+>sp|O66778|ENO_AQUAE
+MSRIKRVHGREVLDSRGNPTVEVEVELESGALGRAIVPSGASTGEREALELRDGDPKRYL
+GKGVLKAVDNVNGVIAKALVGLEPYNQREIDQILIELDGTENKSKLGANAILGTSMAVAR
+AAANELGIPLYEYLGGKFGYRLPVPLMNVINGGAHADNNLDIQEFMIVPVCGGAFREALR
+AGVETFHHLKKILKEKGYSTNVGDEGGFAPNLNSSEEALDILMQAIEKAGYKPGEDILLA
+LDVASSEFYENGVYKFEGKERSAEEMIEFYEKLIQKYPIISIEDPMSENDWEGWKEITKR
+LGDKVQLVGDDLFTTNPKILRKGIEEGVANAILVKLNQIGTVSETLDTVMLAKERNYSAI
+ISHRSGETEDTFISHLAVATNAGQIKTGSASRTDRIAKYNELLRIEERLGNGAVFWGREE
+FYRFTS
+>sp|O66429|EFTU_AQUAE
+MAKEKFERTKEHVNVGTIGHVDHGKSTLTSAITCVLAAGLVEGGKAKCFKYEEIDKAPEE
+KERGITINITHVEYETAKRHYAHVDCPGHADYIKNMITGAAQMDGAILVVSAADGPMPQT
+REHVLLARQVNVPYIVVFMNKCDMVDDEELLELVELEVRELLSKYEYPGDEVPVIRGSAL
+GALQELEQNSPGKWVESIKELLNAMDEYIPTPQREVDKPFLMPIEDVFSISGRGTVVTGR
+VERGVLRPGDEVEIVGLREEPLKTVATSIEMFRKVLDEALPGDNIGVLLRGVGKDDVERG
+QVLAQPGSVKAHKRFRAQVYVLSKEEGGRHTPFFVNYRPQFYFRTADVTGTVVKLPEGVE
+MVMPGDNVELEVELIAPVALEEGLRFAIREGGRTVGAGVVTKILD
+>sp|O67547|SUCD_AQUAE
+MAILVNKDTKVVVQGITGKEGSFHAKQCKEYGTQVVAGVTPGKGGMEVEGIPVFNTVKEA
+VKETGANCSLIFVPAPFAADAIVEALDAGIELVVCITEGIPVKDMMMVKDYMLKNYPNAK
+LVGPNCPGVITPGEAKVGIMPGHIFKRGKIGIVSRSGTLTYEAAYQLTKYGLGQSTAVGI
+GGDPVHGLTHRDVIEMFNKDPETEAILMIGEIGGTEEEEAAEYIEKEVDKPVFAYIAGIT
+APPGKRMGHAGAIIMGGKGTAKAKMEALEKAGAYVIENPAKIGETVAKILKVIELEEEER
+TSDAE
+>sp|O66686|TPIS_AQUAE
+MRRLIAANWKMNKTVKETEEYINTFLKFVEHPESREILICPPFTSLYVAGKMLQGTGVKL
+GAQNCHYEKRGAFTGEISIPMLQEVGCEYVIVGHSERRHIFGESDELIHKKIVACLEMGI
+RPILCVGEKKEEREAGMTFKVIETQIKLALTGVEEHTDKIDIAYEPVWAIGTGTPATPED
+AVEVHTFIRNLINQLNPKNEGKTRILYGGSVNPQNAKEFMKHEEINGLLVGTASLDPESF
+AKIVYSF
+>sp|O67828|ATPB_AQUAE
+MAEVIKGKVVQVIGPVVDVEFEGVKELPKIKDGLKTIRRAIDDRGNWFEEVLFMEVAQHI
+GEHRVRAIAMGPTDGLVRGQEVEYLGGPIKIPVGKEVLGRIFNVAGQPIDEQGPVEAKEY
+WPMFRNPPELVEQSTKVEILETGIKVIDLLQPIIKGGKVGLFGGAGVGKTVLMQELIHNI
+ARFHEGYSVVVGVGERTREGNDLWLEMKESGVLPYTVMVYGQMNEPPGVRFRVAHTGLTM
+AEYFRDVEGQDVLIFIDNIFRFVQAGAEVSTLLGRLPSAVGYQPTLNTDVGEVQERITST
+KKGSITAIQAVYVPADDITDPAPWSIFAHLDATTVLTRRLAELGIYPAIDPLESTSKYLA
+PEYVGEEHYEVAMEVKRILQRYKELQEIIAILGMEELSDEDKAIVNRARRIQKFLSQPFH
+VAEQFTGMPGKYVKLEDTIRSFKEVLTGKYDHLPENAFYMVGTIEDVIEKAKQMGAKV
+>sp|O67118|DNAK_AQUAE
+MAEKKEKIIGIDLGTTNSVVSVMMGDEAVVIQNQEGSRLTPSVVSWTKEKEILVGEPAKR
+RAILDPENTVYESKRFIGRKFEEVKEEAKRVSYKVVPDEKGDAAFDIPNAGKLVRPEEVG
+AHVLRKLKEAAEAFLGEPVKKAVITVPAYFNERQRQATKDAGKIAGLEVVRILNEPTAAA
+MAYGLHKKDNVRILVYDFGGGTFDVSILEGGEGVIEVKVTAGDTHLGGANIDERIMDWLI
+EEFKKETGIDLRKDRTALQRLKEASEQAKKELSFKMETEINLPFITIDPNTNQPLHLQKK
+LTRARLEEMIKDIVDRTIDIVKQALEDAKLKPSDIDEVVLVGGSTRIPLVQQRIKEFFGK
+EPHKGLNPDEVVAMGAAIQAGVLAGEVKEIVLVDVTPLSLGVETYGGVMTVLIPRNTPIP
+VRKCEIFTTAHDYQTEVEIHVLQGERPLAKDNKSLAKFYLTGIPPAPRGVPKIEVCFDID
+ADGILHVTAKDLGTGKEQSVRVEISSGLTPEEIERIIKEAEEHAEEDRKKKELIEAKNQL
+DHLVYQLEKALKEAGDKVPADVKSEAEKVIEEAKKTIETATEIEQVKQVTEKVLQVSSKM
+GTTLYGEAGKQAGGGEKKDEGGEGEVEAKPVD
+>sp|O67736|FOLD_AQUAE
+MALILDGKSLSKKIREEIKKEVENFTSKGFRPPALAVILVGNDPASEIYVNNKRKACEKV
+GIKSLFYHLPQDVSEEKLLGLIYELNMNEEVDGILVQLPLPKHIDQTRVILSISPEKDVD
+GFHPENMGKLVAQIEDGFIPCTPLGIDILLKHYGIDVKGKDVTIVGAGFIVGRPLSLLML
+WRNATVSVCHIHTKDVKKFTKEADILISATGVPHLIKEDMIKEGAVVVDVGISRLNGKIV
+GDVDFERVKEKASAITPVPGGVGPMTVTALLLNTLKSYKRKFAHLISTTNP
+>sp|O67161|G3P_AQUAE
+MAIKVGINGFGRIGRSFFRASWGREEIEIVAINDLTDAKHLAHLLKYDSVHGIFKGSVEA
+KDDSIVVDGKEIKVFAQKDPSQIPWGDLGVDVVIEATGVFRDRENASKHLQGGAKKVIIT
+APAKNPDITVVLGVNEEKYNPKEHNIISNASCTTNCLAPCVKVLNEAFGVEKGYMVTVHA
+YTNDQRLLDLPHKDFRRARAAAINIVPTTTGAAKAIGEVIPELKGKLDGTARRVPVPDGS
+LIDLTVVVNKAPSSVEEVNEKFREAAQKYRESGKVYLKEILQYCEDPIVSTDIVGNPHSA
+IFDAPLTQVIDNLVHIAAWYDNEWGYSCRLRDLVIYLAERGL
+>sp|O67943|CH60_AQUAE
+MAAKAIIYNEEARAKLKAGVDKLANAVKVTLGPKGREVILGKNWGTPVVTKDGVTVAKEI
+ELKDKFENIGAQLVKEVASKTADVAGDGTTTATVLAQAIFHEGLRVAASGANVMEVKRGI
+DKAVKKIVEELKKLSKDVKERKEIEQVATISANNDPEIGKIIADAMEEVGKDGVITVEES
+KSAETTLEVVKGMQFDRGYLSPYFVTDPEKMECVLENPYILIYEKKITNVKELLPILEQV
+VRSGRPLLVIAEDVEGEALATLVVNHIKGVLKACAVKAPGFGQRRKDYLGDIAVLTGGQA
+ITEDLGIKLESVTLDMLGQAEKVVVDKEHTTIIGGKGDPEQIKARIEQIKRQIQETTSDY
+DREKLQERLAKLSGGVAIIRVGAATEAELKEKKYRVEDAVHATKAAVEEGIVPGGGVALV
+RASEALEDLKGDNHDQQLGIDIIKKAVRTPLKQIAYNAGYDGSVVLEKVIELGKEKGVSW
+GFNAATGEYVDMYEAGIIDPTKVVRTAIENAASVAGTMLTAEALIADLPEEKKKDITPTD
+MPELD
+>sp|O66907|ATPA_AQUAE
+MATLTYEEALEILRQQIKDFEPEAKMEEVGVVYYVGDGVARAYGLENVMAMEIVEFQGGQ
+QGIAFNLEEDNVGIIILGSETGIEEGHIVKRTGRILDAPVGEGLVGRVIDPLGNPLDGKG
+PIQFEYRSPVEKIAPGVVKRKPVHEPLQTGIKAIDAMIPIGRGQRELIIGDRATGKTTVA
+IDTILAQKNSDVYCIYVAVGQKRAAIARLIELLEREGAMEYTTVVVASASDPASLQYLAP
+FVGCTIGEYFRDNGKHALIIYDDLSKHAEAYRQLSLLMRRPPGREAYPGDVFYLHSRLLE
+RAAKLNDDLGAGSLTALPIIETKAGDVAAYIPTNVISITDGQIYLEADLFNKGIRPAINV
+GLSVSRVGGAAQIKAMKQVAGTLRLELAQFRELEAFVQFASELDKATQQQINRGLRLVEL
+LKQEPYNPIPVEKQIVLIYAGTHGYLDDIPVESVRKFEKELYAYLDNERPDILKEISEKK
+KLDEELEKKIKEALDAFKQKFVP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-proteomes/CHLTR.fa	Fri Dec 26 09:46:34 2025 +0000
@@ -0,0 +1,84 @@
+>sp|O84067|LEPA_CHLTR
+MKPYKIENIRNFSIIAHIDHGKSTIADRLLESTSTIEQREMREQLLDSMDLERERGITIK
+AHPVTMTYEYEGETYELNLIDTPGHVDFSYEVSRSLAACEGALLIVDAAQGVQAQSLANV
+YLALERDLEIIPVLNKIDLPAAQPEAIKKQIEEFIGLDTSNTIACSAKTGQGIPEILESI
+IRLVPPPKPPQETELKALIFDSHYDPYVGIMVYVRVISGEIKKGDRITFMATKGSSFEVL
+GIGAFLPEATLMEGSLRAGQVGYFIANLKKVKDVKIGDTVTTVKHPAKEPLEGFKEIKPV
+VFAGIYPIDSSDFDTLKDALGRLQLNDSALTIEQENSHSLGFGFRCGFLGLLHLEIIFER
+ISREFDLDIIATAPSVIYKVVLKNGKTLFIDNPTAYPDPALIEHMEEPWVHVNIITPQEY
+LSNIMSLCMDKRGICLKTDMLDQHRLVLSYELPLNEIVSDFNDKLKSVTKGYGSFDYRLG
+DYKKGAIIKLEILINDEAVDAFSCLVHRDKAESKGRSICEKLVDVIPPQLFKIPIQAAIN
+KKIIARETIRALAKNVTAKCYGGDITRKRKLWDKQKKGKKRMKEFGKVSIPNTAFVEVLK
+ME
+>sp|O84026|RF1_CHLTR
+MEIKVLECLKRLEEVEKQISDPNIFSNPKEYSSLSKEHARLSEIKNAHESLVATKKILQD
+DKLALSTEKDPEIVAMLEEGVLVGEEAVERLSKQLENLLIPPDPDDDLSVIMELRAGTGG
+DEAALFVGDCVRMYHLYAASKGWQCEVLSTSESDLGGYKEYVMGISGASVKRFLQYEAGT
+HRVQRVPETETQGRVHTSAVTVAVLPEPAEDDEEVFIDEKDLRIDTFRSSGAGGQHVNVT
+DSAVRITHIPSGVVVTCQDERSQHKNKAKAMRVLKARIRDAEVQKRAQEASAMRSAQVGS
+GDRSERIRTYNFPQNRVTDHRIGLTLYNLDRVMEGELDMITTALVTHVHRQLFGHEETA
+>sp|O84591|ENO_CHLTR
+MFDVVISDIEAREILDSRGYPTLCVKVITNTGTFGEACVPSGASTGIKEALELRDKDPKR
+YQGKGVLQAISNVEKVLMPALQGFSVFDQITADAIMIDADGTPNKEKLGANAILGVSLAL
+AKAAANTLQRPLYRYLGGSFSHVLPCPMMNLINGGMHATNGLQFQEFMIRPISAPSLTEA
+VRMGAEVFNALKKILQNRQLATGVGDEGGFAPNLASNAEALDLLLTAIETAGFTPREDIS
+LALDCAASSFYNTQDKTYDGKSYADQVGILAELCEHYPIDSIEDGLAEEDFEGWKLLSET
+LGDRVQLVGDDLFVTNSALIAEGIAQGLANAVLIKPNQIGTLTETAEAIRLATIQGYATI
+LSHRSGETEDTTIADLAVAFNTGQIKTGSLSRSERIAKYNRLMAIEEEMGPEALFQDSNP
+FSKA
+>sp|P0CD71|EFTU_CHLTR
+MSKETFQRNKPHINIGTIGHVDHGKTTLTAAITRALSGDGLADFRDYSSIDNTPEEKARG
+ITINASHVEYETANRHYAHVDCPGHADYVKNMITGAAQMDGAILVVSATDGAMPQTKEHI
+LLARQVGVPYIVVFLNKIDMISEEDAELVDLVEMELVELLEEKGYKGCPIIRGSALKALE
+GDAAYIEKVRELMQAVDDNIPTPEREIDKPFLMPIEDVFSISGRGTVVTGRIERGIVKVS
+DKVQLVGLRDTKETIVTGVEMFRKELPEGRAGENVGLLLRGIGKNDVERGMVVCLPNSVK
+PHTQFKCAVYVLQKEEGGRHKPFFTGYRPQFFFRTTDVTGVVTLPEGIEMVMPGDNVEFE
+VQLISPVALEEGMRFAIREGGRTIGAGTISKIIA
+>tr|O84829|O84829_CHLTR
+MLELLSKDLPIITQGITGKAGSFHTTQCVAYGSNFVGGVTPGKGGSQFLDLPIFDSVLEA
+KQATGCRASMIFVPPPFAAEAIFEAEDAGIELIVCITEGIPIKDMLEVASLMEKSASSLI
+GPNCPGVIKPGVCKIGIMPGYIHLPGKVGVVSRSGTLTYEAVWQLTQRKIGQSVCIGIGG
+DPLNGTSFIDALQEFEKDSQTEAVLMIGEIGGSAEEEAADWTRQHSSKPVIAFIAGATAP
+KGKRMGHAGAIISGKSGDAFSKQEALRQAGVTVVESLALIGEAVASVLKPR
+>sp|O84332|TPIS_CHLTR
+MFTDKETHRKPFPTWAHLLHSEPSKQFVFGNWKMNKTLTEAQTFLKSFISSDILSNPQII
+TGIIPPFTLLSACQQAVSDSPIFLGAQTTHEADSGAFTGEISAPMLKDIGVDFVLIGHSE
+RRHIFHEQNPVLAEKAAAAIHSGMIPVLCIGETLEEQESGATQDILLNQLTTGLSKLPEQ
+ASFILAYEPVWAIGTGKVAHPDLVQETHAFCRKTIASLFSKDIAERTPILYGGSVKADNA
+RSLSLCPDVNGLLVGGASLSSENFLSIIQQIDIP
+>sp|P17821|DNAK_CHLTR
+MSEKRKSNKIIGIDLGTTNSCVSVMEGGQPKVIASSEGTRTTPSIVAFKGGETLVGIPAK
+RQAVTNPEKTLASTKRFIGRKFSEVESEIKTVPYKVAPNSKGDAVFDVEQKLYTPEEIGA
+QILMKMKETAEAYLGETVTEAVITVPAYFNDSQRASTKDAGRIAGLDVKRIIPEPTAAAL
+AYGIDKEGDKKIAVFDLGGGTFDISILEIGDGVFEVLSTNGDTHLGGDDFDGVIINWMLD
+EFKKQEGIDLSKDNMALQRLKDAAEKAKIELSGVSSTEINQPFITIDANGPKHLALTLTR
+AQFEHLASSLIERTKQPCAQALKDAKLSASDIDDVLLVGGMSRMPAVQAVVKEIFGKEPN
+KGVNPDEVVAIGAAIQGGVLGGEVKDVLLLDVIPLSLGIETLGGVMTPLVERNTTIPTQK
+KQIFSTAADNQPAVTIVVLQGERPMAKDNKEIGRFDLTDIPPAPRGHPQIEVTFDIDANG
+ILHVSAKDAASGREQKIRIEASSGLKEDEIQQMIRDAELHKEEDKQRKEASDVKNEADGM
+IFRAEKAVKDYHDKIPAELVKEIEEHIEKVRQAIKEDASTTAIKAASDELSTHMQKIGEA
+MQAQSASAAASSAANAQGGPNINSEDLKKHSFSTRPPAGGSASSTDNIEDADVEIVDKPE
+>sp|O84081|FOLD_CHLTR
+MLLKGAPAADHILATIKENIRACSKAPGLAVVLIGNNPASEIYVNMKIKRATDLGMVSKS
+YRKPSDATLSDILALIHQLNNDENIHGILVQLPLPKHLDAQAILSTITPDKDVDGLHPVN
+VGKLLLGETDGFIPCTPAGIVELCKYYEIPLHGKHVVILGRSNIVGKPLAALLMQRHADT
+NASVTLLHSQSEHLTEITRTADILISAIGVPLFVNKEMIAEKTVIMDVGTSRIPAANPKG
+YILVGDVDFNNVVPVCRAITPVPGGVGPMTVAMLMRNTWESFLRHTS
+>sp|P0CE13|G3P_CHLTR
+MRIVINGFGRIGRLVLRQILKRNSPIEVVAINDLVAGDLLTYLFKYDSTHGSFAPQATFS
+DGCLVMGERKVHFLAEKDVQKLPWKDLDVDVVVESTGLFVNRDDVAKHLDSGAKRVLITA
+PAKGDVPTFVMGVNHQQFDPADVIISNASCTTNCLAPLAKVLLDNFGIEEGLMTTVHAAT
+ATQSVVDGPSRKDWRGGRGAFQNIIPASTGAAKAVGLCLPELKGKLTGMAFRVPVADVSV
+VDLTVKLSSATTYEAICEAVKHAANTSMKNIMYYTEEAVVSSDFIGCEYSSVFDAQAGVA
+LNDRFFKLVAWYDNEIGYATRIVDLLEYVQENSK
+>sp|P0C0Z7|CH60_CHLTR
+MVAKNIKYNEEARKKIQKGVKTLAEAVKVTLGPKGRHVVIDKSFGSPQVTKDGVTVAKEV
+ELADKHENMGAQMVKEVASKTADKAGDGTTTATVLAEAIYTEGLRNVTAGANPMDLKRGI
+DKAVKVVVDQIRKISKPVQHHKEIAQVATISANNDAEIGNLIAEAMEKVGKNGSITVEEA
+KGFETVLDIVEGMNFNRGYLSSYFATNPETQECVLEDALVLIYDKKISGIKDFLPVLQQV
+AESGRPLLIIAEDIEGEALATLVVNRIRGGFRVCAVKAPGFGDRRKAMLEDIAILTGGQL
+ISEELGMKLENANLAMLGKAKKVIVSKEDTTIVEGMGEKEALEARCESIKKQIEDSSSDY
+DKEKLQERLAKLSGGVAVIRVGAATEIEMKEKKDRVDDAQHATIAAVEEGILPGGGTALI
+RCIPTLEAFLPMLTNEDEQIGARIVLKALSAPLKQIAANAGKEGAIIFQQVMSRSANEGY
+DALRDAYTDMLEAGILDPAKVTRSALESAASVAGLLLTTEALIAEIPEEKPAAAPAMPGA
+GMDY
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-proteomes/MYCGE.fa	Fri Dec 26 09:46:34 2025 +0000
@@ -0,0 +1,52 @@
+>sp|P47500|RF1_MYCGE
+MDFDKQLFFNVEKIVELTEQLEKDLNKPNLSFEQIKVINKELKHKQPLIVKFKELQKLVE
+NANEAEQILNNSSLKELHEEAKKELEKIKASLPSLEEEIKFLLLPVDENNQKNVIVEIRP
+AAGGDESCIFLSDLFNMYKNYCTSKNWTVELNEIIPASVGINFVSFAVNGTDVFAKLKFE
+SGVHRVQRVPLTEAKGRVHTSTVTVAVLPQLEEVEITINPSDLRIDTYRASGAGGQHVNR
+TESAVRITHLPTGIVVACQEGKSQFSNRDKAMKMLRAKLWENAQNKQLSTQADLRKSQVG
+SGERAEKIRTYNYPQNRITDHRIKLTINKLNTVILGDLDEIIEALQADEKKQQLEKFIS
+>sp|P13927|EFTU_MYCGE
+MAREKFDRSKPHVNVGTIGHIDHGKTTLTAAICTVLAKEGKSAATRYDEIDKAPEEKARG
+ITINSAHVEYSSDKRHYAHVDCPGHADYIKNMITGAAQMDGAILVVSATDSVMPQTREHI
+LLARQVGVPKMVVFLNKCDIASDEEVQELVAEEVRDLLTSYGFDGKNTPIIYGSALKALE
+GDPKWEAKIHDLIKAVDEWIPTPTREVDKPFLLAIEDTMTITGRGTVVTGRVERGELKVG
+QEVEIVGLKPIRKAVVTGIEMFKKELDSAMAGDNAGVLLRGVERKEVERGQVLAKPGSIK
+PHKKFKAEIYALKKEEGGRHTGFLNGYRPQFYFRTTDVTGSIALAENTEMVLPGDNASIT
+VELIAPIACEKGSKFSIREGGRTVGAGTVTEVLE
+>sp|P47639|ATPB_MYCGE
+MIKKENLTYGKVHQVIGPVVDVIFSESKQLPRVYDCLSVQLKKSELFLEATQLIGDDIVR
+CIALGPTEGLARNVKVTNYNHPIEVPVGKNVLGRMFNVLGEPIDGKEPLPKKPKLSIHRN
+PPAFDEQPNTVDIFETGIKVIDLLTPYVRGGKIGLFGGAGVGKTVLVQELIHNIAKEHSG
+LSVFAGVGERTREGNDLYYEMIQGGVIDKTVLVFGQMNEPPGARMRVALTALTMAEYFRD
+HDNQNVLLFIDNIFRFTQAGSEVSALLGRMPSAVGYQPTLAIEMGKLQERIASTKTGSIT
+SVQAIYVPADDLTDPAPATTFTHLDAKTVLDRNIAALGIFPAINPLESTSRLLDPSVVGI
+NHYKVALGVQNILQRFAELQDIIAILGIDELSDEDKIIVERARRIRNFLSQPFFVAEKFS
+GIAGKYVSLNDTVQSFKEILEGKHDHLPEQAFFYVGTIQEAVEKAKRLNQEFDKTK
+>sp|P47547|DNAK_MYCGE
+MSADNGLIIGIDLGTTNSCVSVMEGGRPVVLENPEGKRTTPSIVSYKNNEIIVGDAAKRQ
+MVTNPNTIVSIKRLMGTSNKVKVQNADGTTKELSPEQVSAQILSYLKDFAEKKIGKKISR
+AVITVPAYFNDAERNATKTAGKIAGLNVERIINEPTAAALAYGIDKASREMKVLVYDLGG
+GTFDVSLLDIAEGTFEVLATAGDNRLGGDDWDNKIIEYISAYIAKEHQGLNLSKDKMAMQ
+RLKEAAERAKIELSAQLETIISLPFLTVTQKGPVNVELKLTRAKFEELTKPLLERTRNPI
+SDVIKEAKIKPEEINEILLVGGSTRMPAVQKLVESMVPGKKPNRSINPDEVVAIGAAIQG
+GVLRGDVKDVLLLDVTPLTLSIETLGGVATPLIKRNTTIPVSKSQIFSTAQDNQESVDVV
+VCQGERPMSRDNKSLGRFNLGGIQPAPKGKPQIEITFSLDANGILNVKAKDLTTQKENSI
+TISDNGNLSEEEIQKMIRDAEANKERDNIIRERIELRNEGEGIVNTIKEILASPDAKNFP
+KEEKEKLEKLTGNIDAAIKANDYAKLKVEIENFKKWREEMAKKYNPTGEQGPQAK
+>sp|P47543|G3P_MYCGE
+MAAKNRTIKVAINGFGRIGRLVFRSLLSKANVEVVAINDLTQPEVLAHLLKYDSAHGELK
+RKITVKQNILQIDRKKVYVFSEKDPQNLPWDEHDIDVVIESTGRFVSEEGASLHLKAGAK
+RVIISAPAKEKTIRTVVYNVNHKTISSDDKIISAASCTTNCLAPLVHVLEKNFGIVYGTM
+LTVHAYTADQRLQDAPHNDLRRARAAAVNIVPTTTGAAKAIGLVVPEANGKLNGMSLRVP
+VLTGSIVELSVVLEKSPSVEQVNQAMKRFASASFKYCEDPIVSSDVVSSEYGSIFDSKLT
+NIVEVDGMKLYKVYAWYDNESSYVHQLVRVVSYCAKL
+>sp|P47641|ATPA_MYCGE
+MADKLNEYVALIKTEIKKYSKKIFNSEIGQVISVADGIAKVSGLENALLNELIQFENNIQ
+GIVLNLEQNTVGIALFGDYSSLREGSTAKRTHSVMKTPVGDVMLGRIVNALGEAIDGRGD
+IKATEYDQIEKIAPGVMKRKSVNQPLETGILTIDALFPIGKGQRELIVGDRQTGKTAIAI
+DTIINQKDKDVYCVYVAIGQKNSSVAQIVHQLEVNDSMKYTTVVCATASDSDSMVYLSPF
+TGITIAEYWLKKGKDVLIVFDDLSKHAVAYRTLSLLLKRPPGREAFPGDVFYLHSRLLER
+ACKLNDENGGGSITALPIIETQAGDISAYIPTNVISITDGQLFMVSSLFNAGQRPAIQIG
+LSVSRVGSAAQTKAIKQQTGSLKLELAQYSELDSFSQFGSDLDENTKKVLEHGKRVMEMI
+KQPNGKPYSQVHEALFLFAINKAFIKFIPVDEIAKFKQRITEEFNGSHPLFKELSNKKEF
+TEDLESKTKTAFKMLVKRFISTLTDYDITKFGSIEELN
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/omamer.loc.sample	Fri Dec 26 09:46:34 2025 +0000
@@ -0,0 +1,9 @@
+
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of OMAMer databases. 
+#
+# the columns are:
+#value  description version /path/to/data
+#
+#for example
+#Primates-v2.0	Primates	2.0.2	/path/to/data
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Dec 26 09:46:34 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="omamer" comment_char="#">
+        <columns>value, name, version, path</columns>
+        <file path="tool-data/omamer.loc"/>
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Fri Dec 26 09:46:34 2025 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="omamer" comment_char="#">
+        <columns>value, name, version, path</columns>
+        <file path="${__HERE__}/test-data/omamer.loc"/>
+    </table>
+</tables>
\ No newline at end of file