Mercurial > repos > iuc > fastoma
changeset 0:c71024206080 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastoma commit 97621157bc93d4c8b6a9c86c27557b67f8737ca9
| author | iuc |
|---|---|
| date | Fri, 26 Dec 2025 09:46:34 +0000 |
| parents | |
| children | |
| files | fastoma.xml macros.xml test-data/db/test.h5 test-data/expected-species-tree.nwk test-data/omamer.loc test-data/species_tree.nwk test-data/test-proteomes/AQUAE.fa test-data/test-proteomes/CHLTR.fa test-data/test-proteomes/MYCGE.fa tool-data/omamer.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
| diffstat | 12 files changed, 454 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastoma.xml Fri Dec 26 09:46:34 2025 +0000 @@ -0,0 +1,169 @@ +<tool id="fastoma" name="FastOMA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05"> + <description>Infer orthology relationships</description> + <macros> + <import>macros.xml</import> + </macros> + <xrefs> + <xref type="bio.tools">fastoma</xref> + </xrefs> + <requirements> + <expand macro="requirements" /> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #import re + mkdir -p input/proteome && + + #for $proteome in $proteomes: + #set $safename = re.sub('[^\w\-_\.]', '_', $proteome.element_identifier) + ln -s '$proteome' 'input/proteome/${safename}' && + #end for + ln -s '$species_tree' input/species_tree.nwk + && + printf '%s\n' \ + 'process {' \ + ' resourceLimits = [' \ + " cpus: \${GALAXY_SLOTS}," \ + " memory: \${GALAXY_MEMORY_MB:-8192}.MB," \ + ' time: 72.h' \ + ' ]' \ + '}' > galaxy.conf + && + + nextflow run dessimozlab/FastOMA -r v@TOOL_VERSION@ -latest + -c galaxy.conf + -ansi-log false + --input input + --output_folder results + --omamer_db '$database.fields.path' + #if $report + --report + #end if + #if $force_pairwise_ortholog_generation + --force_pairwise_ortholog_generation + #end if + ]]></command> + <inputs> + <param name="proteomes" type="data" multiple="true" format="fasta" label="Protein sequences of proteome"/> + <param name="species_tree" type="data" format="newick" label="Species tree in newick format" help="The species tree must contain all species present in the proteome files"/> + <param name="database" label="OMAmer database" type="select"> + <options from_data_table="omamer"/> + </param> + <param argument="--report" type="boolean" truevalue="--report" falsevalue="" label="Generate report" help="create a report on FastOMA run"/> + <param argument="--force_pairwise_ortholog_generation" type="boolean" truevalue="--force_pairwise_ortholog_generation" falsevalue="" label="Generate pairwise orthologs" help="Force generation of pairwise orthologs even if large number of species are provided (may take long time)"/> + </inputs> + <outputs> + <data format="xml" name="FastOMA_HOGs" label="Hierarchical Orthologous groups (HOGs) by ${tool.name} on ${on_string}" from_work_dir="results/FastOMA_HOGs.orthoxml" /> + <data format="html" name="report_html" label="${tool.name} on ${on_string} Report (HTML)" from_work_dir="results/report.html"><filter>report</filter></data> + <data format="ipynb" name="report_ipynb" label="${tool.name} on ${on_string} Report (Notebook)" from_work_dir="results/report.ipynb"><filter>report</filter></data> + <collection type="list" name="RootHOGsFasta" label="${tool.name} on ${on_string} fasta files per root-level HOGs"> + <discover_datasets pattern="(?P<name>.+)\.fa\.gz" directory="results/RootHOGsFasta/" format="fasta" visible="true"/> + </collection> + <data format="tabular" name="RootHOGsTSV" label="${tool.name} on ${on_string} protein assignments to RootHOGs" from_work_dir="results/RootHOGs.tsv" /> + <data format="tabular" name="OrthologousGroupsTSV" label="Single copy orthologous groups by ${tool.name} on ${on_string}" from_work_dir="results/OrthologousGroups.tsv" /> + <collection type="list" name="OrthologousGroupsFasta" label="Single copy orthologous groups fasta files by ${tool.name} on ${on_string}"> + <discover_datasets pattern="(?P<name>.+)\.fa\.gz" directory="results/OrthologousGroupsFasta/" format="fasta" visible="true"/> + </collection> + <data format="tabular" name="PairwiseOrthologs" label="Pairwise orthologs by ${tool.name} on ${on_string}" from_work_dir="results/orthologs.tsv.gz"><filter>force_pairwise_ortholog_generation</filter></data> + <data format="newick" name="species_tree_checked" label="Checked species tree by ${tool.name} on ${on_string}" from_work_dir="results/species_tree_checked.nwk" /> + </outputs> + <tests> + <test expect_num_outputs="9"> + <param name="proteomes" value="test-proteomes/AQUAE.fa,test-proteomes/CHLTR.fa,test-proteomes/MYCGE.fa"/> + <param name="species_tree" value="species_tree.nwk" ftype="newick"/> + <param name="database" value="test"/> + <param name="report" value="true"/> + <param name="force_pairwise_ortholog_generation" value="true"/> + <output name="FastOMA_HOGs"> + <assert_contents> + <is_valid_xml /> + <has_text_matching expression="<orthoXML .* origin=.FastOMA @TOOL_VERSION@" /> + <has_text_matching expression="<species " n="3" /> + <has_size size="7053" delta="300" /> + </assert_contents> + </output> + <output_collection name="RootHOGsFasta" type="list" count="12"> + <element name="HOG0000001" decompress="true"> + <assert_contents> + <has_text_matching expression="^>" /> + </assert_contents> + </element> + </output_collection> + <output_collection name="OrthologousGroupsFasta" type="list" count="12"> + <element name="OG_0000001" decompress="true"> + <assert_contents> + <has_text_matching expression="^>" /> + </assert_contents> + </element> + </output_collection> + <output name="PairwiseOrthologs" decompress="true"> + <assert_contents> + <has_line_matching expression="sp\|O66778\|ENO_AQUAE\tsp\|O84591\|ENO_CHLTR" /> + <has_n_columns n="2" /> + </assert_contents> + </output> + <output name="species_tree_checked" file="expected-species-tree.nwk" ftype="newick"/> + <output name="RootHOGsTSV"> + <assert_contents> + <has_line_matching expression="HOG:\d+\tsp\|O66429\|EFTU_AQUAE\tHOG:\d+" /> + <has_n_columns n="3" /> + </assert_contents> + </output> + <output name="OrthologousGroupsTSV"> + <assert_contents> + <has_line_matching expression="OG_\d+\tsp\|O84332\|TPIS_CHLTR" /> + <has_n_columns n="2" /> + </assert_contents> + </output> + <output name="report_ipynb"> + <assert_contents> + <has_text text="Number of RootHOGs: 12" /> + <has_text text="There are 2 taxonomic levels in the input species tree with 3 species as leaves." /> + <has_json_property_with_value property="execution_count" value="25" /> + </assert_contents> + </output> + <output name="report_html"> + <assert_contents> + <has_text text="Number of RootHOGs: 12" /> + <has_text text="There are 2 taxonomic levels in the input species tree with 3 species as leaves." /> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +FastOMA is a software tool to infer orthology relationships among multiple species based on their proteomes. + +The main output of FastOMA are Hierarchical Orthologous Groups (HOGs) in OrthoXML format, which represent +groups of genes that have evolved from a common ancestor. These groups are hierarchically nested, +reflecting the evolutionary relationships among the species. + +.. class:: warningmark + +The galaxy tool of FastOMA is not intended for large scale analysis. All steps are run on a single machine, +which usually is not suitable for large datasets. For large scale analysis, please use FastOMA directly +through Nextflow as described in the the documentation: https://github.com/dessimozlab/FastOMA + +**Input data** + +- Protein sequences of proteomes: Provide the protein sequences of the species in FASTA format. Each proteome should be in a separate file. + +- Species tree in Newick format: Provide a species tree that includes all species present in the proteome files. + +- OMAmer database: Select an OMAmer database from the available options. Usually it is benefitial to use the most comprehensive database available (e.g. LUCA). + +**Outputs** + +- *Hierarchical Orthologous groups (HOGs) by FastOMA*: The main output file in OrthoXML format containing the inferred HOGs. + +- *FastOMA Report*: An jupyter notebook and HTML report summarizing the results of the FastOMA run. + +- *FastOMA fasta files per root-level HOGs*: TSV and FASTA files listing the sequences for each root-level HOG. + +- *Single copy orthologous groups*: TSV and FASTA files listing orthologous groups with one protein per species at maximum across the provided proteomes. This data is mostly usefull for species tree reconstruction. + +- *Pairwise orthologs*: TSV file listing all pairwise orthologs inferred by FastOMA. + +- *Checked species tree*: The species tree after checking and reformatting by FastOMA. Internal nodes will also be labeled if they were not labeled in the input tree. Those labels are used to name the internal HOG levels. + +]]></help> +<expand macro="citation" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Dec 26 09:46:34 2025 +0000 @@ -0,0 +1,16 @@ +<macros> + <token name="@TOOL_VERSION@">0.5.1</token> + <token name="@VERSION_SUFFIX@">0</token> + + <xml name="requirements"> + <requirement type="package" version="@TOOL_VERSION@">fastoma</requirement> + </xml> + + <xml name="citation"> + <citations> + <citation type="doi">10.1038/s41592-024-02552-8</citation> + </citations> + </xml> + +</macros> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/expected-species-tree.nwk Fri Dec 26 09:46:34 2025 +0000 @@ -0,0 +1,1 @@ +((AQUAE:1,CHLTR:1)inter1:1,MYCGE:1)inter2:0; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/omamer.loc Fri Dec 26 09:46:34 2025 +0000 @@ -0,0 +1,6 @@ +# Tab separated with 4 columns: +# - value +# - name +# - version +# - /path/to/data +test test 2.0.2 ${__HERE__}/db/test.h5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/species_tree.nwk Fri Dec 26 09:46:34 2025 +0000 @@ -0,0 +1,1 @@ +((AQUAE,CHLTR)inter1,MYCGE)inter2; \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-proteomes/AQUAE.fa Fri Dec 26 09:46:34 2025 +0000 @@ -0,0 +1,104 @@ +>sp|O67618|LEPA_AQUAE +MEQKNVRNFCIIAHVDHGKSTLADRLLEYTGAISEREKREQLLDTLDVERERGITVKMQA +VRMFYKAKDGNTYKLHLIDTPGHVDFSYEVSRALAACEGALLLIDASQGIEAQTVANFWK +AVEQDLVIIPVINKIDLPSADVDRVKKQIEEVLGLDPEEAILASAKEGIGIEEILEAIVN +RIPPPKGDPQKPLKALIFDSYYDPYRGAVAFVRIFDGEVKPGDKIMLMSTGKEYEVTEVG +AQTPKMTKFDKLSAGDVGYIAASIKDVRDIRIGDTITHAKNPTKEPVPGFQPAKPMVYAG +IYPAEDTTYEELRDALEKYAINDAAIVYEPESSPALGMGFRVGFLGLLHMEIVQERLERE +YGVKIITTAPNVIYRVKKKFTDEVIEVRNPMDFPDNAGLIEYVEEPFVLVTIITPKEYVG +PIIQLCQEKRGIQKNMTYLDPNTVYLEYEMPLSEIIVDFHDKIKSISRGFASYDYEFIGY +RPSDLIKLTVLINKKPVDALSFIVHADRAQKFARRVAEKLRETIPRQLFEVHIQVAKGGK +VIASERIKPLRANVTAKCYGGDVTRKKKLLENQKEGKKRMKQFGKVQLPQEAFLSVLKVE +>sp|O67032|RF1_AQUAE +MLKEAYISRLDKLQEKYRKLQEELSKPEVIQDVEKYKKLSKELKELQEINELYERYKKAQ +KELKEAKELLKSSDKDLRELAEEEVNRLTEEMKKLEEELKVHLVPKDPNDTKNVILEIRA +GAGGEEAALFAADLFRMYQKYAEEKGWKVSILSSNKTGLGGYKEVIALIEGEGAYSRLKY +ESGVHRVQRVPVTESSGRIHTSTATVAVLPEVDETDIKIKPEELKIETFRASGAGGQYVN +TTETAVRITHIPTGIVVQCQDERSQFQNKQKALKILYAKLKDYYERKKQEEIAKERKEQV +GTGERSEKIRTYNFPQNRVTDHRINLTLYKLQDVLEGKLDEIIDALRAKEIEKKLELVEK +EG +>sp|O66778|ENO_AQUAE +MSRIKRVHGREVLDSRGNPTVEVEVELESGALGRAIVPSGASTGEREALELRDGDPKRYL +GKGVLKAVDNVNGVIAKALVGLEPYNQREIDQILIELDGTENKSKLGANAILGTSMAVAR +AAANELGIPLYEYLGGKFGYRLPVPLMNVINGGAHADNNLDIQEFMIVPVCGGAFREALR +AGVETFHHLKKILKEKGYSTNVGDEGGFAPNLNSSEEALDILMQAIEKAGYKPGEDILLA +LDVASSEFYENGVYKFEGKERSAEEMIEFYEKLIQKYPIISIEDPMSENDWEGWKEITKR +LGDKVQLVGDDLFTTNPKILRKGIEEGVANAILVKLNQIGTVSETLDTVMLAKERNYSAI +ISHRSGETEDTFISHLAVATNAGQIKTGSASRTDRIAKYNELLRIEERLGNGAVFWGREE +FYRFTS +>sp|O66429|EFTU_AQUAE +MAKEKFERTKEHVNVGTIGHVDHGKSTLTSAITCVLAAGLVEGGKAKCFKYEEIDKAPEE +KERGITINITHVEYETAKRHYAHVDCPGHADYIKNMITGAAQMDGAILVVSAADGPMPQT +REHVLLARQVNVPYIVVFMNKCDMVDDEELLELVELEVRELLSKYEYPGDEVPVIRGSAL +GALQELEQNSPGKWVESIKELLNAMDEYIPTPQREVDKPFLMPIEDVFSISGRGTVVTGR +VERGVLRPGDEVEIVGLREEPLKTVATSIEMFRKVLDEALPGDNIGVLLRGVGKDDVERG +QVLAQPGSVKAHKRFRAQVYVLSKEEGGRHTPFFVNYRPQFYFRTADVTGTVVKLPEGVE +MVMPGDNVELEVELIAPVALEEGLRFAIREGGRTVGAGVVTKILD +>sp|O67547|SUCD_AQUAE +MAILVNKDTKVVVQGITGKEGSFHAKQCKEYGTQVVAGVTPGKGGMEVEGIPVFNTVKEA +VKETGANCSLIFVPAPFAADAIVEALDAGIELVVCITEGIPVKDMMMVKDYMLKNYPNAK +LVGPNCPGVITPGEAKVGIMPGHIFKRGKIGIVSRSGTLTYEAAYQLTKYGLGQSTAVGI +GGDPVHGLTHRDVIEMFNKDPETEAILMIGEIGGTEEEEAAEYIEKEVDKPVFAYIAGIT +APPGKRMGHAGAIIMGGKGTAKAKMEALEKAGAYVIENPAKIGETVAKILKVIELEEEER +TSDAE +>sp|O66686|TPIS_AQUAE +MRRLIAANWKMNKTVKETEEYINTFLKFVEHPESREILICPPFTSLYVAGKMLQGTGVKL +GAQNCHYEKRGAFTGEISIPMLQEVGCEYVIVGHSERRHIFGESDELIHKKIVACLEMGI +RPILCVGEKKEEREAGMTFKVIETQIKLALTGVEEHTDKIDIAYEPVWAIGTGTPATPED +AVEVHTFIRNLINQLNPKNEGKTRILYGGSVNPQNAKEFMKHEEINGLLVGTASLDPESF +AKIVYSF +>sp|O67828|ATPB_AQUAE +MAEVIKGKVVQVIGPVVDVEFEGVKELPKIKDGLKTIRRAIDDRGNWFEEVLFMEVAQHI +GEHRVRAIAMGPTDGLVRGQEVEYLGGPIKIPVGKEVLGRIFNVAGQPIDEQGPVEAKEY +WPMFRNPPELVEQSTKVEILETGIKVIDLLQPIIKGGKVGLFGGAGVGKTVLMQELIHNI +ARFHEGYSVVVGVGERTREGNDLWLEMKESGVLPYTVMVYGQMNEPPGVRFRVAHTGLTM +AEYFRDVEGQDVLIFIDNIFRFVQAGAEVSTLLGRLPSAVGYQPTLNTDVGEVQERITST +KKGSITAIQAVYVPADDITDPAPWSIFAHLDATTVLTRRLAELGIYPAIDPLESTSKYLA +PEYVGEEHYEVAMEVKRILQRYKELQEIIAILGMEELSDEDKAIVNRARRIQKFLSQPFH +VAEQFTGMPGKYVKLEDTIRSFKEVLTGKYDHLPENAFYMVGTIEDVIEKAKQMGAKV +>sp|O67118|DNAK_AQUAE +MAEKKEKIIGIDLGTTNSVVSVMMGDEAVVIQNQEGSRLTPSVVSWTKEKEILVGEPAKR +RAILDPENTVYESKRFIGRKFEEVKEEAKRVSYKVVPDEKGDAAFDIPNAGKLVRPEEVG +AHVLRKLKEAAEAFLGEPVKKAVITVPAYFNERQRQATKDAGKIAGLEVVRILNEPTAAA +MAYGLHKKDNVRILVYDFGGGTFDVSILEGGEGVIEVKVTAGDTHLGGANIDERIMDWLI +EEFKKETGIDLRKDRTALQRLKEASEQAKKELSFKMETEINLPFITIDPNTNQPLHLQKK +LTRARLEEMIKDIVDRTIDIVKQALEDAKLKPSDIDEVVLVGGSTRIPLVQQRIKEFFGK +EPHKGLNPDEVVAMGAAIQAGVLAGEVKEIVLVDVTPLSLGVETYGGVMTVLIPRNTPIP +VRKCEIFTTAHDYQTEVEIHVLQGERPLAKDNKSLAKFYLTGIPPAPRGVPKIEVCFDID +ADGILHVTAKDLGTGKEQSVRVEISSGLTPEEIERIIKEAEEHAEEDRKKKELIEAKNQL +DHLVYQLEKALKEAGDKVPADVKSEAEKVIEEAKKTIETATEIEQVKQVTEKVLQVSSKM +GTTLYGEAGKQAGGGEKKDEGGEGEVEAKPVD +>sp|O67736|FOLD_AQUAE +MALILDGKSLSKKIREEIKKEVENFTSKGFRPPALAVILVGNDPASEIYVNNKRKACEKV +GIKSLFYHLPQDVSEEKLLGLIYELNMNEEVDGILVQLPLPKHIDQTRVILSISPEKDVD +GFHPENMGKLVAQIEDGFIPCTPLGIDILLKHYGIDVKGKDVTIVGAGFIVGRPLSLLML +WRNATVSVCHIHTKDVKKFTKEADILISATGVPHLIKEDMIKEGAVVVDVGISRLNGKIV +GDVDFERVKEKASAITPVPGGVGPMTVTALLLNTLKSYKRKFAHLISTTNP +>sp|O67161|G3P_AQUAE +MAIKVGINGFGRIGRSFFRASWGREEIEIVAINDLTDAKHLAHLLKYDSVHGIFKGSVEA +KDDSIVVDGKEIKVFAQKDPSQIPWGDLGVDVVIEATGVFRDRENASKHLQGGAKKVIIT +APAKNPDITVVLGVNEEKYNPKEHNIISNASCTTNCLAPCVKVLNEAFGVEKGYMVTVHA +YTNDQRLLDLPHKDFRRARAAAINIVPTTTGAAKAIGEVIPELKGKLDGTARRVPVPDGS +LIDLTVVVNKAPSSVEEVNEKFREAAQKYRESGKVYLKEILQYCEDPIVSTDIVGNPHSA +IFDAPLTQVIDNLVHIAAWYDNEWGYSCRLRDLVIYLAERGL +>sp|O67943|CH60_AQUAE +MAAKAIIYNEEARAKLKAGVDKLANAVKVTLGPKGREVILGKNWGTPVVTKDGVTVAKEI +ELKDKFENIGAQLVKEVASKTADVAGDGTTTATVLAQAIFHEGLRVAASGANVMEVKRGI +DKAVKKIVEELKKLSKDVKERKEIEQVATISANNDPEIGKIIADAMEEVGKDGVITVEES +KSAETTLEVVKGMQFDRGYLSPYFVTDPEKMECVLENPYILIYEKKITNVKELLPILEQV +VRSGRPLLVIAEDVEGEALATLVVNHIKGVLKACAVKAPGFGQRRKDYLGDIAVLTGGQA +ITEDLGIKLESVTLDMLGQAEKVVVDKEHTTIIGGKGDPEQIKARIEQIKRQIQETTSDY +DREKLQERLAKLSGGVAIIRVGAATEAELKEKKYRVEDAVHATKAAVEEGIVPGGGVALV +RASEALEDLKGDNHDQQLGIDIIKKAVRTPLKQIAYNAGYDGSVVLEKVIELGKEKGVSW +GFNAATGEYVDMYEAGIIDPTKVVRTAIENAASVAGTMLTAEALIADLPEEKKKDITPTD +MPELD +>sp|O66907|ATPA_AQUAE +MATLTYEEALEILRQQIKDFEPEAKMEEVGVVYYVGDGVARAYGLENVMAMEIVEFQGGQ +QGIAFNLEEDNVGIIILGSETGIEEGHIVKRTGRILDAPVGEGLVGRVIDPLGNPLDGKG +PIQFEYRSPVEKIAPGVVKRKPVHEPLQTGIKAIDAMIPIGRGQRELIIGDRATGKTTVA +IDTILAQKNSDVYCIYVAVGQKRAAIARLIELLEREGAMEYTTVVVASASDPASLQYLAP +FVGCTIGEYFRDNGKHALIIYDDLSKHAEAYRQLSLLMRRPPGREAYPGDVFYLHSRLLE +RAAKLNDDLGAGSLTALPIIETKAGDVAAYIPTNVISITDGQIYLEADLFNKGIRPAINV +GLSVSRVGGAAQIKAMKQVAGTLRLELAQFRELEAFVQFASELDKATQQQINRGLRLVEL +LKQEPYNPIPVEKQIVLIYAGTHGYLDDIPVESVRKFEKELYAYLDNERPDILKEISEKK +KLDEELEKKIKEALDAFKQKFVP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-proteomes/CHLTR.fa Fri Dec 26 09:46:34 2025 +0000 @@ -0,0 +1,84 @@ +>sp|O84067|LEPA_CHLTR +MKPYKIENIRNFSIIAHIDHGKSTIADRLLESTSTIEQREMREQLLDSMDLERERGITIK +AHPVTMTYEYEGETYELNLIDTPGHVDFSYEVSRSLAACEGALLIVDAAQGVQAQSLANV +YLALERDLEIIPVLNKIDLPAAQPEAIKKQIEEFIGLDTSNTIACSAKTGQGIPEILESI +IRLVPPPKPPQETELKALIFDSHYDPYVGIMVYVRVISGEIKKGDRITFMATKGSSFEVL +GIGAFLPEATLMEGSLRAGQVGYFIANLKKVKDVKIGDTVTTVKHPAKEPLEGFKEIKPV +VFAGIYPIDSSDFDTLKDALGRLQLNDSALTIEQENSHSLGFGFRCGFLGLLHLEIIFER +ISREFDLDIIATAPSVIYKVVLKNGKTLFIDNPTAYPDPALIEHMEEPWVHVNIITPQEY +LSNIMSLCMDKRGICLKTDMLDQHRLVLSYELPLNEIVSDFNDKLKSVTKGYGSFDYRLG +DYKKGAIIKLEILINDEAVDAFSCLVHRDKAESKGRSICEKLVDVIPPQLFKIPIQAAIN +KKIIARETIRALAKNVTAKCYGGDITRKRKLWDKQKKGKKRMKEFGKVSIPNTAFVEVLK +ME +>sp|O84026|RF1_CHLTR +MEIKVLECLKRLEEVEKQISDPNIFSNPKEYSSLSKEHARLSEIKNAHESLVATKKILQD +DKLALSTEKDPEIVAMLEEGVLVGEEAVERLSKQLENLLIPPDPDDDLSVIMELRAGTGG +DEAALFVGDCVRMYHLYAASKGWQCEVLSTSESDLGGYKEYVMGISGASVKRFLQYEAGT +HRVQRVPETETQGRVHTSAVTVAVLPEPAEDDEEVFIDEKDLRIDTFRSSGAGGQHVNVT +DSAVRITHIPSGVVVTCQDERSQHKNKAKAMRVLKARIRDAEVQKRAQEASAMRSAQVGS +GDRSERIRTYNFPQNRVTDHRIGLTLYNLDRVMEGELDMITTALVTHVHRQLFGHEETA +>sp|O84591|ENO_CHLTR +MFDVVISDIEAREILDSRGYPTLCVKVITNTGTFGEACVPSGASTGIKEALELRDKDPKR +YQGKGVLQAISNVEKVLMPALQGFSVFDQITADAIMIDADGTPNKEKLGANAILGVSLAL +AKAAANTLQRPLYRYLGGSFSHVLPCPMMNLINGGMHATNGLQFQEFMIRPISAPSLTEA +VRMGAEVFNALKKILQNRQLATGVGDEGGFAPNLASNAEALDLLLTAIETAGFTPREDIS +LALDCAASSFYNTQDKTYDGKSYADQVGILAELCEHYPIDSIEDGLAEEDFEGWKLLSET +LGDRVQLVGDDLFVTNSALIAEGIAQGLANAVLIKPNQIGTLTETAEAIRLATIQGYATI +LSHRSGETEDTTIADLAVAFNTGQIKTGSLSRSERIAKYNRLMAIEEEMGPEALFQDSNP +FSKA +>sp|P0CD71|EFTU_CHLTR +MSKETFQRNKPHINIGTIGHVDHGKTTLTAAITRALSGDGLADFRDYSSIDNTPEEKARG +ITINASHVEYETANRHYAHVDCPGHADYVKNMITGAAQMDGAILVVSATDGAMPQTKEHI +LLARQVGVPYIVVFLNKIDMISEEDAELVDLVEMELVELLEEKGYKGCPIIRGSALKALE +GDAAYIEKVRELMQAVDDNIPTPEREIDKPFLMPIEDVFSISGRGTVVTGRIERGIVKVS +DKVQLVGLRDTKETIVTGVEMFRKELPEGRAGENVGLLLRGIGKNDVERGMVVCLPNSVK +PHTQFKCAVYVLQKEEGGRHKPFFTGYRPQFFFRTTDVTGVVTLPEGIEMVMPGDNVEFE +VQLISPVALEEGMRFAIREGGRTIGAGTISKIIA +>tr|O84829|O84829_CHLTR +MLELLSKDLPIITQGITGKAGSFHTTQCVAYGSNFVGGVTPGKGGSQFLDLPIFDSVLEA +KQATGCRASMIFVPPPFAAEAIFEAEDAGIELIVCITEGIPIKDMLEVASLMEKSASSLI +GPNCPGVIKPGVCKIGIMPGYIHLPGKVGVVSRSGTLTYEAVWQLTQRKIGQSVCIGIGG +DPLNGTSFIDALQEFEKDSQTEAVLMIGEIGGSAEEEAADWTRQHSSKPVIAFIAGATAP +KGKRMGHAGAIISGKSGDAFSKQEALRQAGVTVVESLALIGEAVASVLKPR +>sp|O84332|TPIS_CHLTR +MFTDKETHRKPFPTWAHLLHSEPSKQFVFGNWKMNKTLTEAQTFLKSFISSDILSNPQII +TGIIPPFTLLSACQQAVSDSPIFLGAQTTHEADSGAFTGEISAPMLKDIGVDFVLIGHSE +RRHIFHEQNPVLAEKAAAAIHSGMIPVLCIGETLEEQESGATQDILLNQLTTGLSKLPEQ +ASFILAYEPVWAIGTGKVAHPDLVQETHAFCRKTIASLFSKDIAERTPILYGGSVKADNA +RSLSLCPDVNGLLVGGASLSSENFLSIIQQIDIP +>sp|P17821|DNAK_CHLTR +MSEKRKSNKIIGIDLGTTNSCVSVMEGGQPKVIASSEGTRTTPSIVAFKGGETLVGIPAK +RQAVTNPEKTLASTKRFIGRKFSEVESEIKTVPYKVAPNSKGDAVFDVEQKLYTPEEIGA +QILMKMKETAEAYLGETVTEAVITVPAYFNDSQRASTKDAGRIAGLDVKRIIPEPTAAAL +AYGIDKEGDKKIAVFDLGGGTFDISILEIGDGVFEVLSTNGDTHLGGDDFDGVIINWMLD +EFKKQEGIDLSKDNMALQRLKDAAEKAKIELSGVSSTEINQPFITIDANGPKHLALTLTR +AQFEHLASSLIERTKQPCAQALKDAKLSASDIDDVLLVGGMSRMPAVQAVVKEIFGKEPN +KGVNPDEVVAIGAAIQGGVLGGEVKDVLLLDVIPLSLGIETLGGVMTPLVERNTTIPTQK +KQIFSTAADNQPAVTIVVLQGERPMAKDNKEIGRFDLTDIPPAPRGHPQIEVTFDIDANG +ILHVSAKDAASGREQKIRIEASSGLKEDEIQQMIRDAELHKEEDKQRKEASDVKNEADGM +IFRAEKAVKDYHDKIPAELVKEIEEHIEKVRQAIKEDASTTAIKAASDELSTHMQKIGEA +MQAQSASAAASSAANAQGGPNINSEDLKKHSFSTRPPAGGSASSTDNIEDADVEIVDKPE +>sp|O84081|FOLD_CHLTR +MLLKGAPAADHILATIKENIRACSKAPGLAVVLIGNNPASEIYVNMKIKRATDLGMVSKS +YRKPSDATLSDILALIHQLNNDENIHGILVQLPLPKHLDAQAILSTITPDKDVDGLHPVN +VGKLLLGETDGFIPCTPAGIVELCKYYEIPLHGKHVVILGRSNIVGKPLAALLMQRHADT +NASVTLLHSQSEHLTEITRTADILISAIGVPLFVNKEMIAEKTVIMDVGTSRIPAANPKG +YILVGDVDFNNVVPVCRAITPVPGGVGPMTVAMLMRNTWESFLRHTS +>sp|P0CE13|G3P_CHLTR +MRIVINGFGRIGRLVLRQILKRNSPIEVVAINDLVAGDLLTYLFKYDSTHGSFAPQATFS +DGCLVMGERKVHFLAEKDVQKLPWKDLDVDVVVESTGLFVNRDDVAKHLDSGAKRVLITA +PAKGDVPTFVMGVNHQQFDPADVIISNASCTTNCLAPLAKVLLDNFGIEEGLMTTVHAAT +ATQSVVDGPSRKDWRGGRGAFQNIIPASTGAAKAVGLCLPELKGKLTGMAFRVPVADVSV +VDLTVKLSSATTYEAICEAVKHAANTSMKNIMYYTEEAVVSSDFIGCEYSSVFDAQAGVA +LNDRFFKLVAWYDNEIGYATRIVDLLEYVQENSK +>sp|P0C0Z7|CH60_CHLTR +MVAKNIKYNEEARKKIQKGVKTLAEAVKVTLGPKGRHVVIDKSFGSPQVTKDGVTVAKEV +ELADKHENMGAQMVKEVASKTADKAGDGTTTATVLAEAIYTEGLRNVTAGANPMDLKRGI +DKAVKVVVDQIRKISKPVQHHKEIAQVATISANNDAEIGNLIAEAMEKVGKNGSITVEEA +KGFETVLDIVEGMNFNRGYLSSYFATNPETQECVLEDALVLIYDKKISGIKDFLPVLQQV +AESGRPLLIIAEDIEGEALATLVVNRIRGGFRVCAVKAPGFGDRRKAMLEDIAILTGGQL +ISEELGMKLENANLAMLGKAKKVIVSKEDTTIVEGMGEKEALEARCESIKKQIEDSSSDY +DKEKLQERLAKLSGGVAVIRVGAATEIEMKEKKDRVDDAQHATIAAVEEGILPGGGTALI +RCIPTLEAFLPMLTNEDEQIGARIVLKALSAPLKQIAANAGKEGAIIFQQVMSRSANEGY +DALRDAYTDMLEAGILDPAKVTRSALESAASVAGLLLTTEALIAEIPEEKPAAAPAMPGA +GMDY
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-proteomes/MYCGE.fa Fri Dec 26 09:46:34 2025 +0000 @@ -0,0 +1,52 @@ +>sp|P47500|RF1_MYCGE +MDFDKQLFFNVEKIVELTEQLEKDLNKPNLSFEQIKVINKELKHKQPLIVKFKELQKLVE +NANEAEQILNNSSLKELHEEAKKELEKIKASLPSLEEEIKFLLLPVDENNQKNVIVEIRP +AAGGDESCIFLSDLFNMYKNYCTSKNWTVELNEIIPASVGINFVSFAVNGTDVFAKLKFE +SGVHRVQRVPLTEAKGRVHTSTVTVAVLPQLEEVEITINPSDLRIDTYRASGAGGQHVNR +TESAVRITHLPTGIVVACQEGKSQFSNRDKAMKMLRAKLWENAQNKQLSTQADLRKSQVG +SGERAEKIRTYNYPQNRITDHRIKLTINKLNTVILGDLDEIIEALQADEKKQQLEKFIS +>sp|P13927|EFTU_MYCGE +MAREKFDRSKPHVNVGTIGHIDHGKTTLTAAICTVLAKEGKSAATRYDEIDKAPEEKARG +ITINSAHVEYSSDKRHYAHVDCPGHADYIKNMITGAAQMDGAILVVSATDSVMPQTREHI +LLARQVGVPKMVVFLNKCDIASDEEVQELVAEEVRDLLTSYGFDGKNTPIIYGSALKALE +GDPKWEAKIHDLIKAVDEWIPTPTREVDKPFLLAIEDTMTITGRGTVVTGRVERGELKVG +QEVEIVGLKPIRKAVVTGIEMFKKELDSAMAGDNAGVLLRGVERKEVERGQVLAKPGSIK +PHKKFKAEIYALKKEEGGRHTGFLNGYRPQFYFRTTDVTGSIALAENTEMVLPGDNASIT +VELIAPIACEKGSKFSIREGGRTVGAGTVTEVLE +>sp|P47639|ATPB_MYCGE +MIKKENLTYGKVHQVIGPVVDVIFSESKQLPRVYDCLSVQLKKSELFLEATQLIGDDIVR +CIALGPTEGLARNVKVTNYNHPIEVPVGKNVLGRMFNVLGEPIDGKEPLPKKPKLSIHRN +PPAFDEQPNTVDIFETGIKVIDLLTPYVRGGKIGLFGGAGVGKTVLVQELIHNIAKEHSG +LSVFAGVGERTREGNDLYYEMIQGGVIDKTVLVFGQMNEPPGARMRVALTALTMAEYFRD +HDNQNVLLFIDNIFRFTQAGSEVSALLGRMPSAVGYQPTLAIEMGKLQERIASTKTGSIT +SVQAIYVPADDLTDPAPATTFTHLDAKTVLDRNIAALGIFPAINPLESTSRLLDPSVVGI +NHYKVALGVQNILQRFAELQDIIAILGIDELSDEDKIIVERARRIRNFLSQPFFVAEKFS +GIAGKYVSLNDTVQSFKEILEGKHDHLPEQAFFYVGTIQEAVEKAKRLNQEFDKTK +>sp|P47547|DNAK_MYCGE +MSADNGLIIGIDLGTTNSCVSVMEGGRPVVLENPEGKRTTPSIVSYKNNEIIVGDAAKRQ +MVTNPNTIVSIKRLMGTSNKVKVQNADGTTKELSPEQVSAQILSYLKDFAEKKIGKKISR +AVITVPAYFNDAERNATKTAGKIAGLNVERIINEPTAAALAYGIDKASREMKVLVYDLGG +GTFDVSLLDIAEGTFEVLATAGDNRLGGDDWDNKIIEYISAYIAKEHQGLNLSKDKMAMQ +RLKEAAERAKIELSAQLETIISLPFLTVTQKGPVNVELKLTRAKFEELTKPLLERTRNPI +SDVIKEAKIKPEEINEILLVGGSTRMPAVQKLVESMVPGKKPNRSINPDEVVAIGAAIQG +GVLRGDVKDVLLLDVTPLTLSIETLGGVATPLIKRNTTIPVSKSQIFSTAQDNQESVDVV +VCQGERPMSRDNKSLGRFNLGGIQPAPKGKPQIEITFSLDANGILNVKAKDLTTQKENSI +TISDNGNLSEEEIQKMIRDAEANKERDNIIRERIELRNEGEGIVNTIKEILASPDAKNFP +KEEKEKLEKLTGNIDAAIKANDYAKLKVEIENFKKWREEMAKKYNPTGEQGPQAK +>sp|P47543|G3P_MYCGE +MAAKNRTIKVAINGFGRIGRLVFRSLLSKANVEVVAINDLTQPEVLAHLLKYDSAHGELK +RKITVKQNILQIDRKKVYVFSEKDPQNLPWDEHDIDVVIESTGRFVSEEGASLHLKAGAK +RVIISAPAKEKTIRTVVYNVNHKTISSDDKIISAASCTTNCLAPLVHVLEKNFGIVYGTM +LTVHAYTADQRLQDAPHNDLRRARAAAVNIVPTTTGAAKAIGLVVPEANGKLNGMSLRVP +VLTGSIVELSVVLEKSPSVEQVNQAMKRFASASFKYCEDPIVSSDVVSSEYGSIFDSKLT +NIVEVDGMKLYKVYAWYDNESSYVHQLVRVVSYCAKL +>sp|P47641|ATPA_MYCGE +MADKLNEYVALIKTEIKKYSKKIFNSEIGQVISVADGIAKVSGLENALLNELIQFENNIQ +GIVLNLEQNTVGIALFGDYSSLREGSTAKRTHSVMKTPVGDVMLGRIVNALGEAIDGRGD +IKATEYDQIEKIAPGVMKRKSVNQPLETGILTIDALFPIGKGQRELIVGDRQTGKTAIAI +DTIINQKDKDVYCVYVAIGQKNSSVAQIVHQLEVNDSMKYTTVVCATASDSDSMVYLSPF +TGITIAEYWLKKGKDVLIVFDDLSKHAVAYRTLSLLLKRPPGREAFPGDVFYLHSRLLER +ACKLNDENGGGSITALPIIETQAGDISAYIPTNVISITDGQLFMVSSLFNAGQRPAIQIG +LSVSRVGSAAQTKAIKQQTGSLKLELAQYSELDSFSQFGSDLDENTKKVLEHGKRVMEMI +KQPNGKPYSQVHEALFLFAINKAFIKFIPVDEIAKFKQRITEEFNGSHPLFKELSNKKEF +TEDLESKTKTAFKMLVKRFISTLTDYDITKFGSIEELN
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/omamer.loc.sample Fri Dec 26 09:46:34 2025 +0000 @@ -0,0 +1,9 @@ + +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of OMAMer databases. +# +# the columns are: +#value description version /path/to/data +# +#for example +#Primates-v2.0 Primates 2.0.2 /path/to/data
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Dec 26 09:46:34 2025 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="omamer" comment_char="#"> + <columns>value, name, version, path</columns> + <file path="tool-data/omamer.loc"/> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Dec 26 09:46:34 2025 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="omamer" comment_char="#"> + <columns>value, name, version, path</columns> + <file path="${__HERE__}/test-data/omamer.loc"/> + </table> +</tables> \ No newline at end of file
