comparison fastoma.xml @ 0:c71024206080 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastoma commit 97621157bc93d4c8b6a9c86c27557b67f8737ca9
author iuc
date Fri, 26 Dec 2025 09:46:34 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c71024206080
1 <tool id="fastoma" name="FastOMA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
2 <description>Infer orthology relationships</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <xrefs>
7 <xref type="bio.tools">fastoma</xref>
8 </xrefs>
9 <requirements>
10 <expand macro="requirements" />
11 </requirements>
12 <command detect_errors="exit_code"><![CDATA[
13 #import re
14 mkdir -p input/proteome &&
15
16 #for $proteome in $proteomes:
17 #set $safename = re.sub('[^\w\-_\.]', '_', $proteome.element_identifier)
18 ln -s '$proteome' 'input/proteome/${safename}' &&
19 #end for
20 ln -s '$species_tree' input/species_tree.nwk
21 &&
22 printf '%s\n' \
23 'process {' \
24 ' resourceLimits = [' \
25 " cpus: \${GALAXY_SLOTS}," \
26 " memory: \${GALAXY_MEMORY_MB:-8192}.MB," \
27 ' time: 72.h' \
28 ' ]' \
29 '}' > galaxy.conf
30 &&
31
32 nextflow run dessimozlab/FastOMA -r v@TOOL_VERSION@ -latest
33 -c galaxy.conf
34 -ansi-log false
35 --input input
36 --output_folder results
37 --omamer_db '$database.fields.path'
38 #if $report
39 --report
40 #end if
41 #if $force_pairwise_ortholog_generation
42 --force_pairwise_ortholog_generation
43 #end if
44 ]]></command>
45 <inputs>
46 <param name="proteomes" type="data" multiple="true" format="fasta" label="Protein sequences of proteome"/>
47 <param name="species_tree" type="data" format="newick" label="Species tree in newick format" help="The species tree must contain all species present in the proteome files"/>
48 <param name="database" label="OMAmer database" type="select">
49 <options from_data_table="omamer"/>
50 </param>
51 <param argument="--report" type="boolean" truevalue="--report" falsevalue="" label="Generate report" help="create a report on FastOMA run"/>
52 <param argument="--force_pairwise_ortholog_generation" type="boolean" truevalue="--force_pairwise_ortholog_generation" falsevalue="" label="Generate pairwise orthologs" help="Force generation of pairwise orthologs even if large number of species are provided (may take long time)"/>
53 </inputs>
54 <outputs>
55 <data format="xml" name="FastOMA_HOGs" label="Hierarchical Orthologous groups (HOGs) by ${tool.name} on ${on_string}" from_work_dir="results/FastOMA_HOGs.orthoxml" />
56 <data format="html" name="report_html" label="${tool.name} on ${on_string} Report (HTML)" from_work_dir="results/report.html"><filter>report</filter></data>
57 <data format="ipynb" name="report_ipynb" label="${tool.name} on ${on_string} Report (Notebook)" from_work_dir="results/report.ipynb"><filter>report</filter></data>
58 <collection type="list" name="RootHOGsFasta" label="${tool.name} on ${on_string} fasta files per root-level HOGs">
59 <discover_datasets pattern="(?P&lt;name&gt;.+)\.fa\.gz" directory="results/RootHOGsFasta/" format="fasta" visible="true"/>
60 </collection>
61 <data format="tabular" name="RootHOGsTSV" label="${tool.name} on ${on_string} protein assignments to RootHOGs" from_work_dir="results/RootHOGs.tsv" />
62 <data format="tabular" name="OrthologousGroupsTSV" label="Single copy orthologous groups by ${tool.name} on ${on_string}" from_work_dir="results/OrthologousGroups.tsv" />
63 <collection type="list" name="OrthologousGroupsFasta" label="Single copy orthologous groups fasta files by ${tool.name} on ${on_string}">
64 <discover_datasets pattern="(?P&lt;name&gt;.+)\.fa\.gz" directory="results/OrthologousGroupsFasta/" format="fasta" visible="true"/>
65 </collection>
66 <data format="tabular" name="PairwiseOrthologs" label="Pairwise orthologs by ${tool.name} on ${on_string}" from_work_dir="results/orthologs.tsv.gz"><filter>force_pairwise_ortholog_generation</filter></data>
67 <data format="newick" name="species_tree_checked" label="Checked species tree by ${tool.name} on ${on_string}" from_work_dir="results/species_tree_checked.nwk" />
68 </outputs>
69 <tests>
70 <test expect_num_outputs="9">
71 <param name="proteomes" value="test-proteomes/AQUAE.fa,test-proteomes/CHLTR.fa,test-proteomes/MYCGE.fa"/>
72 <param name="species_tree" value="species_tree.nwk" ftype="newick"/>
73 <param name="database" value="test"/>
74 <param name="report" value="true"/>
75 <param name="force_pairwise_ortholog_generation" value="true"/>
76 <output name="FastOMA_HOGs">
77 <assert_contents>
78 <is_valid_xml />
79 <has_text_matching expression="&lt;orthoXML .* origin=.FastOMA @TOOL_VERSION@" />
80 <has_text_matching expression="&lt;species " n="3" />
81 <has_size size="7053" delta="300" />
82 </assert_contents>
83 </output>
84 <output_collection name="RootHOGsFasta" type="list" count="12">
85 <element name="HOG0000001" decompress="true">
86 <assert_contents>
87 <has_text_matching expression="^>" />
88 </assert_contents>
89 </element>
90 </output_collection>
91 <output_collection name="OrthologousGroupsFasta" type="list" count="12">
92 <element name="OG_0000001" decompress="true">
93 <assert_contents>
94 <has_text_matching expression="^>" />
95 </assert_contents>
96 </element>
97 </output_collection>
98 <output name="PairwiseOrthologs" decompress="true">
99 <assert_contents>
100 <has_line_matching expression="sp\|O66778\|ENO_AQUAE\tsp\|O84591\|ENO_CHLTR" />
101 <has_n_columns n="2" />
102 </assert_contents>
103 </output>
104 <output name="species_tree_checked" file="expected-species-tree.nwk" ftype="newick"/>
105 <output name="RootHOGsTSV">
106 <assert_contents>
107 <has_line_matching expression="HOG:\d+\tsp\|O66429\|EFTU_AQUAE\tHOG:\d+" />
108 <has_n_columns n="3" />
109 </assert_contents>
110 </output>
111 <output name="OrthologousGroupsTSV">
112 <assert_contents>
113 <has_line_matching expression="OG_\d+\tsp\|O84332\|TPIS_CHLTR" />
114 <has_n_columns n="2" />
115 </assert_contents>
116 </output>
117 <output name="report_ipynb">
118 <assert_contents>
119 <has_text text="Number of RootHOGs: 12" />
120 <has_text text="There are 2 taxonomic levels in the input species tree with 3 species as leaves." />
121 <has_json_property_with_value property="execution_count" value="25" />
122 </assert_contents>
123 </output>
124 <output name="report_html">
125 <assert_contents>
126 <has_text text="Number of RootHOGs: 12" />
127 <has_text text="There are 2 taxonomic levels in the input species tree with 3 species as leaves." />
128 </assert_contents>
129 </output>
130 </test>
131 </tests>
132 <help><![CDATA[
133 FastOMA is a software tool to infer orthology relationships among multiple species based on their proteomes.
134
135 The main output of FastOMA are Hierarchical Orthologous Groups (HOGs) in OrthoXML format, which represent
136 groups of genes that have evolved from a common ancestor. These groups are hierarchically nested,
137 reflecting the evolutionary relationships among the species.
138
139 .. class:: warningmark
140
141 The galaxy tool of FastOMA is not intended for large scale analysis. All steps are run on a single machine,
142 which usually is not suitable for large datasets. For large scale analysis, please use FastOMA directly
143 through Nextflow as described in the the documentation: https://github.com/dessimozlab/FastOMA
144
145 **Input data**
146
147 - Protein sequences of proteomes: Provide the protein sequences of the species in FASTA format. Each proteome should be in a separate file.
148
149 - Species tree in Newick format: Provide a species tree that includes all species present in the proteome files.
150
151 - OMAmer database: Select an OMAmer database from the available options. Usually it is benefitial to use the most comprehensive database available (e.g. LUCA).
152
153 **Outputs**
154
155 - *Hierarchical Orthologous groups (HOGs) by FastOMA*: The main output file in OrthoXML format containing the inferred HOGs.
156
157 - *FastOMA Report*: An jupyter notebook and HTML report summarizing the results of the FastOMA run.
158
159 - *FastOMA fasta files per root-level HOGs*: TSV and FASTA files listing the sequences for each root-level HOG.
160
161 - *Single copy orthologous groups*: TSV and FASTA files listing orthologous groups with one protein per species at maximum across the provided proteomes. This data is mostly usefull for species tree reconstruction.
162
163 - *Pairwise orthologs*: TSV file listing all pairwise orthologs inferred by FastOMA.
164
165 - *Checked species tree*: The species tree after checking and reformatting by FastOMA. Internal nodes will also be labeled if they were not labeled in the input tree. Those labels are used to name the internal HOG levels.
166
167 ]]></help>
168 <expand macro="citation" />
169 </tool>