comparison macros.xml @ 0:881f2faa5fe7 draft

planemo upload for repository https://github.com/picrust/picrust2 commit 972784d909912af20cd213fc56830fee79d83ca6
author iuc
date Sat, 04 Mar 2023 20:28:14 +0000
parents
children 677f3c8db525
comparison
equal deleted inserted replaced
-1:000000000000 0:881f2faa5fe7
1 <?xml version="1.0"?>
2 <macros>
3 <token name="@TOOL_VERSION@">2.5.1</token>
4 <token name="@VERSION_SUFFIX@">0</token>
5 <token name="@PROFILE@">22.01</token>
6 <xml name="bio_tool">
7 <xrefs>
8 <xref type="bio.tools">picrust2</xref>
9 </xrefs>
10 </xml>
11 <xml name="requirements">
12 <requirements>
13 <requirement type="package" version="@TOOL_VERSION@">picrust2</requirement>
14 <yield/>
15 </requirements>
16 </xml>
17 <token name="@HELP_HEADER@">
18 What it does
19 ============
20
21 PICRUSt2 (Phylogenetic Investigation of Communities by Reconstruction of
22 Unobserved States) is a tool for predicting functional abundances based only on
23 marker gene sequences.
24
25 Read more about the tool: https://github.com/picrust/picrust2/wiki
26 </token>
27 <xml name="citations">
28 <citations>
29 <citation type="doi">10.1038/s41587-020-0548-6</citation>
30 </citations>
31 </xml>
32
33
34
35 <token name="@VAR_ACCESS_FOO@"><![CDATA[
36 ## in picrust2_pipeline the parameters are within a section or a
37 ## conditional. in the separate sections they are not.
38 ## this function allows unified access
39 #def getVarCond($sec_cond, $var)
40 #if $varExists($var)
41 #return $getVar($var)
42 #else if $varExists($sec_cond + "." + $var)
43 #return $getVar($sec_cond + "." + $var)
44 #else
45 #return
46 #end if
47 #end def
48 ]]></token>
49
50 <!-- macros for place_seqs -->
51
52 <token name="@PLACE_SEQS_PREPROCESSING@"><![CDATA[
53 ## determine project dir which is something like /lib/python3.8/site-packages/picrust2/default_files/
54 PROJECT_DIR=\$(python -c 'from picrust2 import default; print(default.project_dir)') &&
55 REF_DIR_BASE=\$PROJECT_DIR"/default_files/" &&
56 #if $getVarCond("place_seqs_section", "ref_dir.selector") == "custom"
57 mkdir -p custom/ &&
58 ln -s '$getVarCond("place_seqs_section", "ref_dir.custom_fna")' custom/custom.fna &&
59 ln -s '$getVarCond("place_seqs_section", "ref_dir.custom_hmm")' custom/custom.hmm &&
60 #if $getVarCond("place_seqs_section", "placement_tool") == "epa-ng"
61 ln -s '$getVarCond("place_seqs_section", "ref_dir.custom_model")' custom/custom.model &&
62 #else if $getVarCond("place_seqs_section", "placement_tool")
63 ln -s '$getVarCond("place_seqs_section", "ref_dir.custom_model")' custom/custom.raxml_info &&
64 #end if
65 ln -s '$getVarCond("place_seqs_section", "ref_dir.custom_tre")' custom/custom.tre &&
66 #end if
67 ]]></token>
68 <token name="@PLACE_SEQS_PARAMS@"><![CDATA[
69 --study_fasta '$getVarCond("place_seqs_section", "study_fasta")'
70 --placement_tool '$getVarCond("place_seqs_section", "placement_tool")'
71 ## set refdir (default is prokaryotic), even if the default will
72 ## be treated internally as `"\$REF_DIR_BASE"$ref_dir.selector`
73 ## picrust2 will complain about non-default reference files
74 ## specified with default pathway mapfile
75 #if $getVarCond("place_seqs_section", "ref_dir.selector") == "custom"
76 --ref_dir custom/
77 #else if $getVarCond("place_seqs_section", "ref_dir.selector") != "prokaryotic/pro_ref/"
78 --ref_dir "\$REF_DIR_BASE"$getVarCond("place_seqs_section", "ref_dir.selector")
79 #end if
80 --min_align $getVarCond("place_seqs_section", "min_align")
81 ]]></token>
82 <xml name="place_seqs_params">
83 <param argument="--study_fasta" type="data" format="fasta" label="Study sequences" help="Sequences of the representative OTUs and/or ASVs. Sequences need to be on the positive strand and the headerline should be only one field, i.e. no additional whitespace-delimited fields"/>
84 <param argument="--placement_tool" type="select" label="Placement tool" help="Used for placing sequences into reference tree">
85 <option value="epa-ng" selected="true">EPA-ng - Fast, parallel, highly accurate Maximum Likelihood Phylogenetic Placement, by the team behind RAxML(-ng)</option>
86 <option value="sepp">SEPP - SATe-enabled Phylogenetic Placement</option>
87 </param>
88 <conditional name="ref_dir">
89 <param name="selector" type="select" label="Reference data" help="Used for sequence placement">
90 <option value="prokaryotic/pro_ref/" selected="true">Prokaryotic 16S rRNA gene</option>
91 <!-- TODO https://github.com/picrust/picrust2/issues/276 -->
92 <option value="fungi/fungi_ITS/">Fungal ITS (only for epa-ng)</option>
93 <option value="fungi/fungi_18S/">Fungal 18S (only for epa-ng)</option>
94 <option value="custom">Custom reference sequence files</option>
95 </param>
96 <when value="prokaryotic/pro_ref/"/>
97 <when value="fungi/fungi_ITS/"/>
98 <when value="fungi/fungi_18S/"/>
99 <when value="custom">
100 <param name="custom_fna" type="data" format="fasta" label="Multiple-sequence alignment of reference sequences"/>
101 <param name="custom_hmm" type="data" format="hmm2,hmm3" label="Hidden-markov model of the multiple-sequence alignment" help="The HMM of the alignment can be created using hmmbuild"/>
102 <param name="custom_tre" type="data" format="newick" label="Tree of the reference sequences"/>
103 <param name="custom_model" type="data" format="txt" label="Modelfile" help="For epa-ng: output by RaXmL specifying the best parameters for the tree, for sepp see examples in PICRUSt2 repository"/>
104 </when>
105 </conditional>
106 <param argument="--min_align" type="float" value="0.80" min="0.0" max="1.0" label="Minimum alignment length" help="Proportion of the total length of an input query sequence that must align with reference sequences. Sequences with lengths below this value will be excluded from the placement and all subsequent steps"/>
107 </xml>
108 <xml name="place_seqs_output" tokens="from_work_dir" token_label_suffix="">
109 <data name="out_tree" format="newick" from_work_dir="@FROM_WORK_DIR@/out.tre" label="${tool.name} on ${on_string}: Tree of reference and study 16S sequences @LABEL_SUFFIX@"/>
110 <collection name="place_seqs_intermediate_output" type="list" label="${tool.name} on ${on_string}: Intermediate files @LABEL_SUFFIX@" >
111 <discover_datasets pattern="__name_and_ext__" directory="@FROM_WORK_DIR@/intermediate/place_seqs/"/>
112 <yield/>
113 </collection>
114 </xml>
115
116 <!-- parameters of hsp -->
117 <token name="@HSP_PARAMS@"><![CDATA[
118 ## hsp and picrust2_pipeline
119 #if $getVarCond("hsp_section", "trait_input.selector") == "default"
120 #if $varExists('trait_input.in_trait')
121 --in_trait '$trait_input.in_trait'
122 #else if $varExists('hsp_section.trait_input.in_traits')
123 --in_traits '$hsp_section.trait_input.in_traits'
124 #else
125 #raise Exception("wrapper must define in_trait / in_traits")
126 #end if
127 #else if $getVarCond("hsp_section", "trait_input.selector") == "custom"
128 #if $varExists('trait_input.observed_trait_table')
129 --observed_trait_table '$trait_input.observed_trait_table'
130 #else if $varExists('hsp_section.trait_input.custom_trait_tables')
131 --custom_trait_tables '$hsp_section.trait_input.custom_trait_tables'
132 --marker_gene_table '$hsp_section.trait_input.marker_gene_table'
133 #else
134 #raise Exception("wrapper must define observed_trait_table / (custom_trait_tables + marker_gene_table)")
135 #end if
136 #end if
137
138
139 --hsp_method '$getVarCond("hsp_section", "hsp_method_options.hsp_method")'
140 #if $getVarCond("hsp_section", "hsp_method_options.hsp_method") == "mp"
141 --edge_exponent $getVarCond("hsp_section", "hsp_method_options.edge_exponent")
142 #else if $getVarCond("hsp_section", "") == "emp_prob"
143 ## special treatment of seed (option absent in picrust2_pipeline)
144 #if $varExists('hsp_method_options') and has_attrib($hsp_method_options, "seed")
145 --seed $hsp_method_options.seed
146 #end if
147 #end if
148 ## hsp and picrust2_pipeline use different CLI params to toggle NSTI computation
149 #if $varExists('calculate_NSTI')
150 $calculate_NSTI
151 #else if $varExists('hsp_section.skip_nsti')
152 $hsp_section.skip_nsti
153 #else
154 #raise Exception("wrapper must define calculate_NSTI / skip_nsti")
155 #end if
156 ]]></token>
157 <!-- - one of nsti_[true,false]value must be given: CLI param
158 differs between hsp and picrust2_pipeline
159 - nsti_checked must be set accordingly to true or false
160
161 furthermore there three yields can be used (2 names & 1 unnamed)
162 - the unnamed is used to add the seed param for hsp (for \-\-hsp_method emp_prob)
163 - the named yield `add_default_traits` is used to add two default trait tables for hsp
164 - the named yield `custom_traits` is used for the different parameters
165 to specify custom trait tables in hsp (observed_trait_table) and
166 picrust2_pipeline (custom_trait_tables, marker_gene_table)
167 -->
168 <xml name="hsp_params" tokens="nsti_checked,in_trait_arg,in_trait_multiple,in_trait_label_suff" token_nsti_truevalue="" token_nsti_falsevalue="" token_in_traits_help="">
169 <conditional name="trait_input">
170 <param name="selector" type="select" label="Trait table@IN_TRAIT_LABEL_SUFF@" help="i.e. which gene families to predict">
171 <option value="default" selected="true">Default trait table@IN_TRAIT_LABEL_SUFF@</option>
172 <option value="custom">Customized trait table@IN_TRAIT_LABEL_SUFF@</option>
173 </param>
174 <when value="default">
175 <param argument="@IN_TRAIT_ARG@" type="select" multiple="@IN_TRAIT_MULTIPLE@" optional="false" label="Pre-calculated trait table@IN_TRAIT_LABEL_SUFF@" help="@IN_TRAITS_HELP@">
176 <option value="COG">Clusters of Orthologous Genes database (COG)</option>
177 <option value="EC" selected="true">Enzyme Commission number database (EC number)</option>
178 <option value="KO" selected="true">KEGG Orthology database (KO)</option>
179 <option value="PFAM">Pfam database</option>
180 <option value="TIGRFAM">TIGRFAM database</option>
181 <yield name="add_default_traits"/>
182 </param>
183 </when>
184 <when value="custom">
185 <yield name="custom_traits"/>
186 </when>
187 </conditional>
188 <conditional name="hsp_method_options">
189 <param argument="--hsp_method" type="select" label="Hidden-state prediction method">
190 <option value="mp" selected="true">Predict discrete traits by: Maximum parsimony (mp)</option>
191 <option value="emp_prob">Predict discrete traits by: Empirical state probabilities across tips (emp_prob)</option>
192 <option value="subtree_average">Predict continuous traits by: Subtree averaging (subtree_average)</option>
193 <option value="pic">Predict continuous traits by: phylogentic independent contrast (pic)</option>
194 <option value="scp">Reconstruct continuous traits by: squared-change parsimony (scp)</option>
195 </param>
196 <when value="mp">
197 <param argument="--edge_exponent" type="float" value="0.5" min="0.0" label="Transition cost weight" help="Specifies weighting transition costs by the inverse length of edge lengths. If 0, then edge lengths do not influence predictions"/>
198 </when>
199 <when value="emp_prob">
200 <yield/>
201 </when>
202 <when value="subtree_average"/>
203 <when value="pic"/>
204 <when value="scp"/>
205 </conditional>
206 <param argument="@NSTI_TRUEVALUE@@NSTI_FALSEVALUE@" type="boolean" truevalue="@NSTI_TRUEVALUE@" falsevalue="@NSTI_FALSEVALUE@" checked="@NSTI_CHECKED@" label="Calculate NSTI and add to output file" help="And add to output file"/>
207 </xml>
208
209 <!-- parameters of the metagenome_pipeline -->
210
211 <token name="@PREPARE_METAGENOME_PIPELINE_PARAMS@"><![CDATA[
212 #set $_input=$getVarCond("metagenome_pipeline_section", "input")
213 #if $_input.ext == "mothur.shared"
214 #set ext="msf"
215 #else if $_input.ext == "tabular"
216 #set ext="tsv"
217 #else if $_input.ext.startswith('biom')
218 #set ext="biom"
219 #else
220 >&2 "unknown extension $_input.ext"
221 exit 1;
222 #end if
223 ln -s '$input' 'input.$ext' &&
224 ]]></token>
225 <token name="@METAGENOME_PIPELINE_PARAMS@"><![CDATA[
226 --input 'input.$ext'
227 #if $getVarCond("metagenome_pipeline_section", "input_options.selector") == "ASV"
228 --min_reads $getVarCond("metagenome_pipeline_section", "input_options.min_reads")
229 --min_samples $getVarCond("metagenome_pipeline_section", "input_options.min_samples")
230 #end if
231 $getVarCond("metagenome_pipeline_section", "stratified_output.selector")
232 #if $getVarCond("metagenome_pipeline_section", "stratified_output.selector") != ''
233 $getVarCond("metagenome_pipeline_section", "stratified_output.wide_table")
234 #end if
235 $getVarCond("metagenome_pipeline_section", "skip_norm")
236 --max_nsti $getVarCond("metagenome_pipeline_section", "max_nsti")
237 ]]></token>
238 <xml name="metagenome_pipeline_params" tokens="stratified_arg">
239 <param argument="--input" type="data" format="tabular,biom1,biom2,mothur.shared" label="Sequence abundance table (OTUs or ASVs)" help="The sequence abundances should be in read counts and not relative abundances. The tool will normalize the input sequence abundance table by the predicted number of marker genes"/>
240 <conditional name="input_options">
241 <param name="selector" type="select" label="Sequence abundance table type">
242 <option value="OTU" selected="true">Operational Taxonomic Units (OTU)</option>
243 <option value="ASV">Amplicon Sequence Variants (ASV)</option>
244 </param>
245 <when value="OTU">
246 </when>
247 <when value="ASV">
248 <param argument="--min_reads" type="integer" min="1" value="1" label="Minimum number of reads across all samples for each input ASV" help="ASVs below this cut-off will be counted as part of the RARE category in the stratified output"/>
249 <param argument="--min_samples" type="integer" min="1" value="1" label="Minimum number of samples that an ASV needs to be identfied within" help="ASVs below this cut-off will be counted as part of the RARE category in the stratified output"/>
250 </when>
251 </conditional>
252 <yield/>
253 <param argument="--max_nsti" type="float" min="0" value="2.0" label="Maximum Nearest-sequenced taxon index (NSTI)" help="Sequences with larger values will be excluded"/>
254 <conditional name="stratified_output">
255 <param argument="@STRATIFIED_ARG@" name="selector" type="select" label="Generate an output table stratified by sequences">
256 <option value="" selected="true">No</option>
257 <option value="@STRATIFIED_ARG@">Yes [will increase run-time]</option>
258 </param>
259 <when value=""/>
260 <when value="@STRATIFIED_ARG@">
261 <param argument="--wide_table" type="boolean" truevalue="--wide_table" falsevalue="" checked="false" label="Output wide-format stratified table of metagenome predictions" help="This is the deprecated method of generating stratified tables since it is extremely memory intensive"/>
262 </when>
263 </conditional>
264 <param argument="--skip_norm" type="boolean" truevalue="--skip_norm" falsevalue="" checked="false" label="Skip normalizing sequence abundances by predicted marker gene copy numbers"/>
265 </xml>
266
267 <!-- pathway_pipeline macros-->
268 <token name="@PATHWAY_PIPELINE_PARAMS@"><![CDATA[
269 ## in pathway_pipeline its --map while in picrust2_pipeline its --pathway_map
270 #if $varExists('map') and $map
271 --map '$map'
272 #else if $varExists('predict_pathways.pathway_map') and $predict_pathways.pathway_map
273 --pathway_map '$predict_pathways.pathway_map'
274 #end if
275 $getVarCond("predict_pathways", "skip_minpath")
276 $getVarCond("predict_pathways", "no_gap_fill")
277 $getVarCond("predict_pathways", "regrouping.no_regroup")
278 #if $getVarCond("predict_pathways", "regrouping.no_regroup") == '' and $getVarCond("predict_pathways", "regrouping.regroup_map")
279 --regroup_map '$getVarCond("predict_pathways", "regroup_map")'
280 #end if
281 $getVarCond("predict_pathways", "strat_output.per_sequence_contrib")
282 #if $getVarCond("predict_pathways", "strat_output.per_sequence_contrib") != ""
283 --per_sequence_function '$getVarCond("predict_pathways", "strat_output.per_sequence_function")'
284 --per_sequence_abun '$getVarCond("predict_pathways", "strat_output.per_sequence_abun")'
285 $getVarCond("predict_pathways", "strat_output.wide_table")
286 #end if
287 $getVarCond("predict_pathways", "coverage")
288 ]]></token>
289 <xml name="pathway_pipeline_params" tokens="mapargument">
290 <param argument="@MAPARGUMENT@" type="data" format="txt,tabular" optional="true" label="Customized table mapping of pathways to reactions" help="Default mapping file is Maps MetaCyc reactions to prokaryotic MetaCyc pathways"/>
291 <param argument="--skip_minpath" type="boolean" truevalue="" falsevalue="--skip_minpath" checked="true" label="Run MinPath to identify which pathways are present as a first pass"/>
292 <param argument="--no_gap_fill" type="boolean" truevalue="" falsevalue="--no_gap_fill" checked="true" label="Perform gap filling before predicting pathway abundances"/>
293 <conditional name="regrouping">
294 <param argument="--no_regroup" type="select" label="Regroup input gene families to reactions">
295 <option value="">Yes</option>
296 <option value="--no_regroup">No</option>
297 </param>
298 <when value="">
299 <param argument="--regroup_map" type="data" format="tabular" optional="true" label="Mapfile of ids to regroup gene families to before running MinPath" help="Keep empty to use the default mapping file (ec_level4_to_metacyc_rxn.tsv contained in PICRUSt2)"/>
300 </when>
301 <when value="--no_regroup"/>
302 </conditional>
303 <conditional name="strat_output">
304 <param argument="--per_sequence_contrib" type="select" label="Calculate pathway abundances for each individual predicted genome" help="The output will be the predicted pathway abundance contributed by each individual sequence. This is in contrast to the default stratified output, which is the contribution to the community-wide pathway abundances. Note this will greatly increase the runtime. Experimental pathway coverage stratified by contributing sequence will also be output when --coverage is set">
305 <option value="--per_sequence_contrib">Yes</option>
306 <option value="" selected="true">No</option>
307 </param>
308 <when value="--per_sequence_contrib">
309 <param argument="--per_sequence_abun" type="data" format="tabular" label="Table of sequence abundances across samples normalized by marker copy number" help="Typically the normalized sequence abundance table output at the metagenome pipeline step. This input is required when the per sequence contrib option is set"/>
310 <param argument="--per_sequence_function" type="data" format="tabular" label="Table of function abundances per sequence, which was outputted at the hidden-state prediction step" help="This input is required when the per sequence contrib option is set. Note that this file should be the same input table as used for the metagenome pipeline step"/>
311 <!-- TODO maybe deprecate .. because complicated anyway as its used in metagenome_pipeline as well and help says deprecated as well -->
312 <param argument="--wide_table" type="boolean" truevalue="--wide_table" falsevalue="" checked="false" label="Output wide-format stratified table (DEPRECATED)" help="Instead of the metagenome contribution table. This is the deprecated method of generating
313 stratified tables since it is extremely memory intensive"/>
314 </when>
315 <when value=""/>
316 </conditional>
317 <param argument="--coverage" type="boolean" truevalue="--coverage" falsevalue="" checked="false" label="Calculate pathway coverages as well as abundances" help="Experimental and only useful for advanced users"/>
318 </xml>
319 <xml name="pathways_output" tokens="from_work_dir" token_label_suffix="">
320 <data name="pathways_output" format="tabular" from_work_dir="@FROM_WORK_DIR@/pathways_out/path_abun_unstrat.tabular" label="${tool.name} on ${on_string}: Pathway abundances">
321 <yield/>
322 </data>
323 <collection name="pathways_intermediate_output" type="list" label="${tool.name} on ${on_string}: Intermediate files @LABEL_SUFFIX@" >
324 <discover_datasets pattern="__name_and_ext__" directory="@FROM_WORK_DIR@/intermediate/pathways/" format="tabular"/>
325 <yield name="intermediate_filter"/>
326 </collection>
327 <data format="tabular" name="path_cov_unstrat" from_work_dir="@FROM_WORK_DIR@/pathways_out/path_cov_unstrat.tabular" label="${tool.name} on ${on_string}: Pathway coverage @LABEL_SUFFIX@" >
328 <yield/>
329 <yield name="coverage_filter"/>
330 </data>
331 <data format="tabular" name="path_abun_unstrat_per_seq" from_work_dir="@FROM_WORK_DIR@/pathways_out/path_abun_unstrat_per_seq.tabular" label="${tool.name} on ${on_string}: Pathway abundance unstratified per sequence @LABEL_SUFFIX@" >
332 <yield/>
333 <yield name="per_sequence_filter"/>
334 </data>
335 <data format="tabular" name="path_abun_predictions" from_work_dir="@FROM_WORK_DIR@/pathways_out/path_abun_predictions.tabular" label="${tool.name} on ${on_string}: Pathway abundance predictions @LABEL_SUFFIX@" >
336 <yield/>
337 <yield name="per_sequence_filter"/>
338 </data>
339 <data format="tabular" name="path_abun_contrib" from_work_dir="@FROM_WORK_DIR@/pathways_out/path_abun_contrib.tabular" label="${tool.name} on ${on_string}: Pathway abundance contributed @LABEL_SUFFIX@" >
340 <yield/>
341 <yield name="per_sequence_filter"/>
342 </data>
343 </xml>
344 </macros>