comparison macros.xml @ 0:07bf5268724f draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author iuc
date Fri, 14 Oct 2022 21:45:54 +0000
parents
children 0ae1a2636de5
comparison
equal deleted inserted replaced
-1:000000000000 0:07bf5268724f
1 <?xml version="1.0"?>
2 <macros>
3 <token name="@TOOL_VERSION@">1.1.1</token>
4 <token name="@VERSION_SUFFIX@">0</token>
5 <token name="@PROFILE@">21.01</token>
6 <xml name="biotools">
7 <xrefs>
8 <xref type="bio.tools">semibin</xref>
9 </xrefs>
10 </xml>
11 <xml name="requirements">
12 <requirements>
13 <requirement type="package" version="@TOOL_VERSION@">semibin</requirement>
14 <yield/>
15 </requirements>
16 </xml>
17 <xml name="version">
18 <version_command>SemiBin -v</version_command>
19 </xml>
20 <xml name="mode_fasta_bam">
21 <conditional name="mode">
22 <expand macro="mode_select"/>
23 <when value="single">
24 <expand macro="input-fasta-single"/>
25 <expand macro="input-bam-single"/>
26 </when>
27 <when value="co">
28 <expand macro="input-fasta-single"/>
29 <expand macro="input-bam-multi"/>
30 </when>
31 <when value="multi">
32 <expand macro="input-fasta-multi"/>
33 <expand macro="input-bam-multi"/>
34 </when>
35 </conditional>
36 </xml>
37 <xml name="mode_fasta">
38 <conditional name="mode">
39 <expand macro="mode_select"/>
40 <when value="single">
41 <expand macro="input-fasta-single"/>
42 </when>
43 <when value="co">
44 <expand macro="input-fasta-single"/>
45 </when>
46 <when value="multi">
47 <expand macro="input-fasta-multi"/>
48 </when>
49 </conditional>
50 </xml>
51 <xml name="mode_select">
52 <param name="select" type="select" label="Binning mode">
53 <option value="single" selected="true">Single sample binning (each sample is assembled and binned independently)</option>
54 <option value="co">Co-assembly binning (samples are co-assembled together and binned together)</option>
55 <option value="multi">Multi-sample binning (multiple samples are assembled and binned individually, but information from multiple samples is used together)</option>
56 </param>
57 </xml>
58 <xml name="input-fasta-single">
59 <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="Contig sequences"/>
60 </xml>
61 <xml name="input-fasta-multi">
62 <conditional name="multi_fasta">
63 <param name="select" type="select" label="Contig files of the samples">
64 <option value="concatenated" selected="true">1 concatenated file (created using the dedicated tool) with all sample contigs </option>
65 <option value="multi">1 contig file per sample</option>
66 </param>
67 <when value="concatenated">
68 <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="Combined contig sequences"/>
69 <expand macro="separator"/>
70 </when>
71 <when value="multi">
72 <param argument="--input-fasta" type="data" multiple="true" format="fasta,fasta.gz" label="Contig sequences"/>
73 <expand macro="concat_min_len"/>
74 </when>
75 </conditional>
76 </xml>
77 <xml name="concat_min_len">
78 <param name="min_len" type="integer" min="0" value="0" label="Minimal length for contigs to be kept"/>
79 </xml>
80 <token name="@SINGLE_FASTA_FILES@"><![CDATA[
81 #if $input_fasta.ext.endswith(".gz")
82 gunzip -c '$input_fasta' > 'contigs.fasta' &&
83 #else
84 ln -s '$input_fasta' 'contigs.fasta' &&
85 #end if
86 ]]></token>
87 <token name="@FASTA_FILES@"><![CDATA[
88 #if $mode.select == 'single' or $mode.select == 'co'
89 #if $mode.input_fasta.ext.endswith(".gz")
90 gunzip -c '$mode.input_fasta' > 'contigs.fasta' &&
91 #else
92 ln -s '$mode.input_fasta' 'contigs.fasta' &&
93 #end if
94 #else
95 #if $mode.multi_fasta.select == 'concatenated'
96 #if $mode.multi_fasta.input_fasta.ext.endswith(".gz")
97 gunzip -c '$mode.multi_fasta.input_fasta' > 'contigs.fasta' &&
98 #else
99 ln -s '$mode.multi_fasta.input_fasta' 'contigs.fasta' &&
100 #end if
101 #set $separator = $mode.multi_fasta.separator
102 #else
103 #for $e in $mode.multi_fasta.input_fasta
104 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($e.element_identifier))
105 #if $e.ext.endswith(".gz")
106 gunzip -c '$e' > '${identifier}.fasta' &&
107 #else
108 ln -s '$e' '${identifier}.fasta' &&
109 #end if
110 #end for
111 #set $separator = ':'
112 SemiBin concatenate_fasta
113 --input-fasta *.fasta
114 --output 'output'
115 --separator '$separator'
116 -m $mode.multi_fasta.min_len
117 &&
118 ln -s 'output/concatenated.fa' 'contigs.fasta' &&
119 #end if
120 #end if
121 ]]></token>
122 <xml name="separator">
123 <param argument="--separator" type="text" value=":" label="Separator in the contig file between sample name and contig name"/>
124 </xml>
125 <xml name="input-bam-single">
126 <param argument="--input-bam" type="data" format="bam" label="Read mapping to the contigs" help="Sorted BAM files"/>
127 </xml>
128 <xml name="input-bam-multi">
129 <param argument="--input-bam" type="data" format="bam" multiple="true" label="Read mapping to the contigs" help="One file per sample, sorted BAM files"/>
130 </xml>
131 <token name="@BAM_FILES@"><![CDATA[
132 #if $mode.select == 'single'
133 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($input_bam.element_identifier))
134 ln -s '$input_bam' '${identifier}.bam' &&
135 #else
136 #for $e in $input_bam
137 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($e.element_identifier))
138 ln -s '$e' '${identifier}.bam' &&
139 #end for
140 #end if
141 ]]></token>
142 <xml name="ref_select">
143 <param name="select" type="select" label="Reference database">
144 <option value="cached" selected="true">Cached database</option>
145 <option value="taxonomy">Pre-computed taxonomy</option>
146 </param>
147 </xml>
148 <xml name="cached_db">
149 <param name="cached_db" label="Cached databases" type="select">
150 <options from_data_table="gtdb">
151 <validator message="No GTDB database is available" type="no_options" />
152 </options>
153 </param>
154 </xml>
155 <xml name="ref-single">
156 <conditional name="ref">
157 <expand macro="ref_select"/>
158 <when value="cached">
159 <expand macro="cached_db"/>
160 </when>
161 <when value="taxonomy">
162 <param argument="--taxonomy-annotation-table" type="data" format="tabular" label="Pre-computed mmseqs2 format taxonomy TSV file"/>
163 </when>
164 </conditional>
165 </xml>
166 <xml name="ref-multi">
167 <conditional name="ref">
168 <expand macro="ref_select"/>
169 <when value="cached">
170 <expand macro="cached_db"/>
171 </when>
172 <when value="taxonomy">
173 <param argument="--taxonomy-annotation-table" type="data" format="tabular" multiple="true" label="Pre-computed mmseqs2 format taxonomy TSV file" help="One per bin file"/>
174 </when>
175 </conditional>
176 </xml>
177 <xml name="ref_single">
178 <conditional name="ref">
179 <expand macro="ref_select"/>
180 <when value="cached">
181 <expand macro="cached_db"/>
182 </when>
183 <when value="taxonomy">
184 <param argument="--taxonomy-annotation-table" type="data" format="tabular" label="Pre-computed mmseqs2 format taxonomy TSV file"/>
185 </when>
186 </conditional>
187 </xml>
188 <xml name="min_len">
189 <conditional name="min_len">
190 <param name="method" type="select" label="Method to set up the minimal length for contigs in binning">
191 <option value="automatic">Automatic</option>
192 <option value="min-len">Manual</option>
193 <option value="ratio">Computation based on ratio of the number of base pairs</option>
194 </param>
195 <when value="automatic"/>
196 <when value="min-len">
197 <param argument="--min-len" type="integer" min="0" value="0" label="Minimal length for contigs in binning"/>
198 </when>
199 <when value="ratio">
200 <param argument="--ratio" type="float" min="0" max="1" value="0.05" label="Ratio of the number of base pairs of contigs between 1000-2500 bp below which the minimal length will be set as 1000bp, otherwise 2500bp."/>
201 </when>
202 </conditional>
203 </xml>
204 <token name="@MIN_LEN@"><![CDATA[
205 #if $min_len.method == 'min-len'
206 --min-len $min_len.min_len
207 #else if $min_len.method == 'ratio'
208 --ratio $min_len.ratio
209 #end if
210 ]]></token>
211 <xml name="random-seed">
212 <param argument="--random-seed" type="integer" min="0" value="0" label="Random seed to reproduce result"/>
213 </xml>
214 <xml name="ml-threshold">
215 <param argument="--ml-threshold" type="integer" min="0" value="" optional="true" label="Length threshold for generating must-link constraints" help="If no value is given, the threshold is calculated from the contig, and the default minimum value is 4,000 bp."/>
216 </xml>
217 <xml name="epoches">
218 <param argument="--epoches" type="integer" min="0" value="20" label="Number of epoches used in the training process"/>
219 </xml>
220 <xml name="batch-size">
221 <param argument="--batch-size" type="integer" min="0" value="2048" label="Batch size used in the training process"/>
222 </xml>
223 <xml name="orf-finder">
224 <param argument="--orf-finder" type="select" label="ORF finder used to estimate the number of bins">
225 <option value="prodigal" selected="true">Prodigal</option>
226 <option value="fraggenescan">Fraggenescan</option>
227 </param>
228 </xml>
229 <xml name="max-node">
230 <param argument="--max-node" type="float" min="0" max="1" value="1" label="Fraction of contigs that considered to be binned"/>
231 </xml>
232 <xml name="max-edges">
233 <param argument="--max-edges" type="integer" min="0" value="200" label="Maximum number of edges that can be connected to one contig"/>
234 </xml>
235 <xml name="environment">
236 <param argument="--environment" type="select" optional="true" label="Environment for the built-in model">
237 <option value="" selected="true">None</option>
238 <option value="human_gut">Human gut</option>
239 <option value="dog_gut">Dog gut</option>
240 <option value="ocean">Ocean</option>
241 <option value="soil">Soil</option>
242 <option value="cat_gut">Cat gut</option>
243 <option value="human_oral">Human oral</option>
244 <option value="mouse_gut">Mouse gut</option>
245 <option value="pig_gut">Pig gut</option>
246 <option value="built_environment">Built environment</option>
247 <option value="wastewater">Wastewater</option>
248 <option value="global">Global</option>
249 </param>
250 </xml>
251 <xml name="minfasta-kbs">
252 <param argument="--minfasta-kbs" type="integer" min="0" value="200" label="Miminimum bin size in Kbps"/>
253 </xml>
254 <xml name="no-recluster">
255 <param argument="--no-recluster" type="boolean" truevalue="--no-recluster" falsevalue="" checked="false" label="Do not recluster bins?"/>
256 </xml>
257 <xml name="data">
258 <param argument="--data" type="data" format="csv" label="Train data"/>
259 </xml>
260 <xml name="data_output_single">
261 <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data">
262 <filter>mode["select"]=="single" or mode["select"]=="co"</filter>
263 </data>
264 <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data">
265 <filter>mode["select"]=="single" or mode["select"]=="co"</filter>
266 </data>
267 </xml>
268 <xml name="data_output_multi">
269 <collection name="multi_data" type="list" label="${tool.name} on ${on_string}: Training data per sample">
270 <filter>mode["select"]=="multi"</filter>
271 <discover_datasets pattern="(?P&lt;designation&gt;.*)\/data.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/>
272 </collection>
273 <collection name="multi_data_split" type="list" label="${tool.name} on ${on_string}: Split training data per sample">
274 <filter>mode["select"]=="multi"</filter>
275 <discover_datasets pattern="(?P&lt;designation&gt;.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/>
276 </collection>
277 </xml>
278 <xml name="generate_sequence_features_extra_outputs">
279 <data name="single_cov" format="csv" from_work_dir="output/*_data_cov.csv" label="${tool.name} on ${on_string}: Coverage">
280 <filter>mode["select"]=="single" and extra_output and "coverage" in extra_output</filter>
281 </data>
282 <data name="single_split_cov" format="csv" from_work_dir="output/*_data_split_cov.csv" label="${tool.name} on ${on_string}: Coverage (split data)">
283 <filter>mode["select"]=="single" and extra_output and "coverage" in extra_output</filter>
284 </data>
285 <collection name="co_cov" type="list" label="${tool.name} on ${on_string}: Coverage">
286 <filter>mode["select"]=="co" and extra_output and "coverage" in extra_output</filter>
287 <discover_datasets pattern=".*\.bam_(?P&lt;designation&gt;.*)_data_cov\.csv" format="csv" directory="output/" />
288 </collection>
289 <collection name="co_split_cov" type="list" label="${tool.name} on ${on_string}: Coverage (split data) per sample">
290 <filter>mode["select"]=="co" and extra_output and "coverage" in extra_output</filter>
291 <discover_datasets pattern=".*\.bam_(?P&lt;designation&gt;.*)_data_split_cov\.csv" format="csv" directory="output/" />
292 </collection>
293 <collection name="multi_cov" type="list" label="${tool.name} on ${on_string}: Coverage">
294 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter>
295 <discover_datasets pattern=".*\.bam_(?P&lt;designation&gt;.*)_data_cov.csv" format="csv" directory="output/samples/" />
296 </collection>
297 <collection name="multi_cov_sample" type="list" label="${tool.name} on ${on_string}: Coverage per sample">
298 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter>
299 <discover_datasets pattern="(?P&lt;designation&gt;.*)\/data_cov.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/>
300 </collection>
301 <collection name="multi_split_cov" type="list" label="${tool.name} on ${on_string}: Coverage (split data) per sample">
302 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter>
303 <discover_datasets pattern=".*\.bam_(?P&lt;designation&gt;.*)_data_split_cov.csv" format="csv" directory="output/samples/" />
304 </collection>
305 <collection name="multi_split_cov_sample" type="list" label="${tool.name} on ${on_string}: Coverage (split data) per sample">
306 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter>
307 <discover_datasets pattern="(?P&lt;designation&gt;.*)\/data_split_cov.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/>
308 </collection>
309 <collection name="multi_contigs" type="list" label="${tool.name} on ${on_string}: Contigs">
310 <filter>mode["select"]=="multi" and extra_output and "contigs" in extra_output</filter>
311 <discover_datasets pattern="(?P&lt;designation&gt;.*).fa" format="fasta" directory="output/samples/" />
312 </collection>
313 </xml>
314 <xml name="train_output">
315 <data name="model" format="h5" from_work_dir="output/model.h5" label="${tool.name} on ${on_string}: Semi-supervised deep learning model" />
316 </xml>
317 <xml name="cannot_link_output">
318 <data name="cannot" format="txt" from_work_dir="output/cannot/cannot.txt" label="${tool.name} on ${on_string}: Cannot-link constraints" />
319 </xml>
320 <token name="@HELP_HEADER@"><![CDATA[
321 What it does
322 ============
323
324 SemiBin is a Semi-supervised siamese neural network for metagenomic binning
325
326 ]]></token>
327 <token name="@HELP_INPUT_FASTA@"><![CDATA[
328 - Contigs in fasta for 1 or several samples from single or co-assembly
329 ]]></token>
330 <token name="@HELP_INPUT_BAM@"><![CDATA[
331 - BAM with reads mapping to the contigs
332 ]]></token>
333 <token name="@HELP_CANNOT@"><![CDATA[
334 - Cannot-link constraints
335 ]]></token>
336 <token name="@HELP_DATA@"><![CDATA[
337 - Training data and split training data for the model
338 ]]></token>
339 <token name="@HELP_MODEL@"><![CDATA[
340 - Semi-supervised deep learning model
341 ]]></token>
342 <token name="@HELP_BINS@"><![CDATA[
343 - Reconstructed bins after reclustering
344 - Reconstructed bins before reclustering
345 ]]></token>
346 <xml name="citations">
347 <citations>
348 <citation type="doi">10.1038/s41467-022-29843-y</citation>
349 </citations>
350 </xml>
351 </macros>