Mercurial > repos > iuc > semibin_generate_sequence_features
comparison macros.xml @ 0:07bf5268724f draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4"
author | iuc |
---|---|
date | Fri, 14 Oct 2022 21:45:54 +0000 |
parents | |
children | 0ae1a2636de5 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:07bf5268724f |
---|---|
1 <?xml version="1.0"?> | |
2 <macros> | |
3 <token name="@TOOL_VERSION@">1.1.1</token> | |
4 <token name="@VERSION_SUFFIX@">0</token> | |
5 <token name="@PROFILE@">21.01</token> | |
6 <xml name="biotools"> | |
7 <xrefs> | |
8 <xref type="bio.tools">semibin</xref> | |
9 </xrefs> | |
10 </xml> | |
11 <xml name="requirements"> | |
12 <requirements> | |
13 <requirement type="package" version="@TOOL_VERSION@">semibin</requirement> | |
14 <yield/> | |
15 </requirements> | |
16 </xml> | |
17 <xml name="version"> | |
18 <version_command>SemiBin -v</version_command> | |
19 </xml> | |
20 <xml name="mode_fasta_bam"> | |
21 <conditional name="mode"> | |
22 <expand macro="mode_select"/> | |
23 <when value="single"> | |
24 <expand macro="input-fasta-single"/> | |
25 <expand macro="input-bam-single"/> | |
26 </when> | |
27 <when value="co"> | |
28 <expand macro="input-fasta-single"/> | |
29 <expand macro="input-bam-multi"/> | |
30 </when> | |
31 <when value="multi"> | |
32 <expand macro="input-fasta-multi"/> | |
33 <expand macro="input-bam-multi"/> | |
34 </when> | |
35 </conditional> | |
36 </xml> | |
37 <xml name="mode_fasta"> | |
38 <conditional name="mode"> | |
39 <expand macro="mode_select"/> | |
40 <when value="single"> | |
41 <expand macro="input-fasta-single"/> | |
42 </when> | |
43 <when value="co"> | |
44 <expand macro="input-fasta-single"/> | |
45 </when> | |
46 <when value="multi"> | |
47 <expand macro="input-fasta-multi"/> | |
48 </when> | |
49 </conditional> | |
50 </xml> | |
51 <xml name="mode_select"> | |
52 <param name="select" type="select" label="Binning mode"> | |
53 <option value="single" selected="true">Single sample binning (each sample is assembled and binned independently)</option> | |
54 <option value="co">Co-assembly binning (samples are co-assembled together and binned together)</option> | |
55 <option value="multi">Multi-sample binning (multiple samples are assembled and binned individually, but information from multiple samples is used together)</option> | |
56 </param> | |
57 </xml> | |
58 <xml name="input-fasta-single"> | |
59 <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="Contig sequences"/> | |
60 </xml> | |
61 <xml name="input-fasta-multi"> | |
62 <conditional name="multi_fasta"> | |
63 <param name="select" type="select" label="Contig files of the samples"> | |
64 <option value="concatenated" selected="true">1 concatenated file (created using the dedicated tool) with all sample contigs </option> | |
65 <option value="multi">1 contig file per sample</option> | |
66 </param> | |
67 <when value="concatenated"> | |
68 <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="Combined contig sequences"/> | |
69 <expand macro="separator"/> | |
70 </when> | |
71 <when value="multi"> | |
72 <param argument="--input-fasta" type="data" multiple="true" format="fasta,fasta.gz" label="Contig sequences"/> | |
73 <expand macro="concat_min_len"/> | |
74 </when> | |
75 </conditional> | |
76 </xml> | |
77 <xml name="concat_min_len"> | |
78 <param name="min_len" type="integer" min="0" value="0" label="Minimal length for contigs to be kept"/> | |
79 </xml> | |
80 <token name="@SINGLE_FASTA_FILES@"><![CDATA[ | |
81 #if $input_fasta.ext.endswith(".gz") | |
82 gunzip -c '$input_fasta' > 'contigs.fasta' && | |
83 #else | |
84 ln -s '$input_fasta' 'contigs.fasta' && | |
85 #end if | |
86 ]]></token> | |
87 <token name="@FASTA_FILES@"><![CDATA[ | |
88 #if $mode.select == 'single' or $mode.select == 'co' | |
89 #if $mode.input_fasta.ext.endswith(".gz") | |
90 gunzip -c '$mode.input_fasta' > 'contigs.fasta' && | |
91 #else | |
92 ln -s '$mode.input_fasta' 'contigs.fasta' && | |
93 #end if | |
94 #else | |
95 #if $mode.multi_fasta.select == 'concatenated' | |
96 #if $mode.multi_fasta.input_fasta.ext.endswith(".gz") | |
97 gunzip -c '$mode.multi_fasta.input_fasta' > 'contigs.fasta' && | |
98 #else | |
99 ln -s '$mode.multi_fasta.input_fasta' 'contigs.fasta' && | |
100 #end if | |
101 #set $separator = $mode.multi_fasta.separator | |
102 #else | |
103 #for $e in $mode.multi_fasta.input_fasta | |
104 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($e.element_identifier)) | |
105 #if $e.ext.endswith(".gz") | |
106 gunzip -c '$e' > '${identifier}.fasta' && | |
107 #else | |
108 ln -s '$e' '${identifier}.fasta' && | |
109 #end if | |
110 #end for | |
111 #set $separator = ':' | |
112 SemiBin concatenate_fasta | |
113 --input-fasta *.fasta | |
114 --output 'output' | |
115 --separator '$separator' | |
116 -m $mode.multi_fasta.min_len | |
117 && | |
118 ln -s 'output/concatenated.fa' 'contigs.fasta' && | |
119 #end if | |
120 #end if | |
121 ]]></token> | |
122 <xml name="separator"> | |
123 <param argument="--separator" type="text" value=":" label="Separator in the contig file between sample name and contig name"/> | |
124 </xml> | |
125 <xml name="input-bam-single"> | |
126 <param argument="--input-bam" type="data" format="bam" label="Read mapping to the contigs" help="Sorted BAM files"/> | |
127 </xml> | |
128 <xml name="input-bam-multi"> | |
129 <param argument="--input-bam" type="data" format="bam" multiple="true" label="Read mapping to the contigs" help="One file per sample, sorted BAM files"/> | |
130 </xml> | |
131 <token name="@BAM_FILES@"><![CDATA[ | |
132 #if $mode.select == 'single' | |
133 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($input_bam.element_identifier)) | |
134 ln -s '$input_bam' '${identifier}.bam' && | |
135 #else | |
136 #for $e in $input_bam | |
137 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($e.element_identifier)) | |
138 ln -s '$e' '${identifier}.bam' && | |
139 #end for | |
140 #end if | |
141 ]]></token> | |
142 <xml name="ref_select"> | |
143 <param name="select" type="select" label="Reference database"> | |
144 <option value="cached" selected="true">Cached database</option> | |
145 <option value="taxonomy">Pre-computed taxonomy</option> | |
146 </param> | |
147 </xml> | |
148 <xml name="cached_db"> | |
149 <param name="cached_db" label="Cached databases" type="select"> | |
150 <options from_data_table="gtdb"> | |
151 <validator message="No GTDB database is available" type="no_options" /> | |
152 </options> | |
153 </param> | |
154 </xml> | |
155 <xml name="ref-single"> | |
156 <conditional name="ref"> | |
157 <expand macro="ref_select"/> | |
158 <when value="cached"> | |
159 <expand macro="cached_db"/> | |
160 </when> | |
161 <when value="taxonomy"> | |
162 <param argument="--taxonomy-annotation-table" type="data" format="tabular" label="Pre-computed mmseqs2 format taxonomy TSV file"/> | |
163 </when> | |
164 </conditional> | |
165 </xml> | |
166 <xml name="ref-multi"> | |
167 <conditional name="ref"> | |
168 <expand macro="ref_select"/> | |
169 <when value="cached"> | |
170 <expand macro="cached_db"/> | |
171 </when> | |
172 <when value="taxonomy"> | |
173 <param argument="--taxonomy-annotation-table" type="data" format="tabular" multiple="true" label="Pre-computed mmseqs2 format taxonomy TSV file" help="One per bin file"/> | |
174 </when> | |
175 </conditional> | |
176 </xml> | |
177 <xml name="ref_single"> | |
178 <conditional name="ref"> | |
179 <expand macro="ref_select"/> | |
180 <when value="cached"> | |
181 <expand macro="cached_db"/> | |
182 </when> | |
183 <when value="taxonomy"> | |
184 <param argument="--taxonomy-annotation-table" type="data" format="tabular" label="Pre-computed mmseqs2 format taxonomy TSV file"/> | |
185 </when> | |
186 </conditional> | |
187 </xml> | |
188 <xml name="min_len"> | |
189 <conditional name="min_len"> | |
190 <param name="method" type="select" label="Method to set up the minimal length for contigs in binning"> | |
191 <option value="automatic">Automatic</option> | |
192 <option value="min-len">Manual</option> | |
193 <option value="ratio">Computation based on ratio of the number of base pairs</option> | |
194 </param> | |
195 <when value="automatic"/> | |
196 <when value="min-len"> | |
197 <param argument="--min-len" type="integer" min="0" value="0" label="Minimal length for contigs in binning"/> | |
198 </when> | |
199 <when value="ratio"> | |
200 <param argument="--ratio" type="float" min="0" max="1" value="0.05" label="Ratio of the number of base pairs of contigs between 1000-2500 bp below which the minimal length will be set as 1000bp, otherwise 2500bp."/> | |
201 </when> | |
202 </conditional> | |
203 </xml> | |
204 <token name="@MIN_LEN@"><![CDATA[ | |
205 #if $min_len.method == 'min-len' | |
206 --min-len $min_len.min_len | |
207 #else if $min_len.method == 'ratio' | |
208 --ratio $min_len.ratio | |
209 #end if | |
210 ]]></token> | |
211 <xml name="random-seed"> | |
212 <param argument="--random-seed" type="integer" min="0" value="0" label="Random seed to reproduce result"/> | |
213 </xml> | |
214 <xml name="ml-threshold"> | |
215 <param argument="--ml-threshold" type="integer" min="0" value="" optional="true" label="Length threshold for generating must-link constraints" help="If no value is given, the threshold is calculated from the contig, and the default minimum value is 4,000 bp."/> | |
216 </xml> | |
217 <xml name="epoches"> | |
218 <param argument="--epoches" type="integer" min="0" value="20" label="Number of epoches used in the training process"/> | |
219 </xml> | |
220 <xml name="batch-size"> | |
221 <param argument="--batch-size" type="integer" min="0" value="2048" label="Batch size used in the training process"/> | |
222 </xml> | |
223 <xml name="orf-finder"> | |
224 <param argument="--orf-finder" type="select" label="ORF finder used to estimate the number of bins"> | |
225 <option value="prodigal" selected="true">Prodigal</option> | |
226 <option value="fraggenescan">Fraggenescan</option> | |
227 </param> | |
228 </xml> | |
229 <xml name="max-node"> | |
230 <param argument="--max-node" type="float" min="0" max="1" value="1" label="Fraction of contigs that considered to be binned"/> | |
231 </xml> | |
232 <xml name="max-edges"> | |
233 <param argument="--max-edges" type="integer" min="0" value="200" label="Maximum number of edges that can be connected to one contig"/> | |
234 </xml> | |
235 <xml name="environment"> | |
236 <param argument="--environment" type="select" optional="true" label="Environment for the built-in model"> | |
237 <option value="" selected="true">None</option> | |
238 <option value="human_gut">Human gut</option> | |
239 <option value="dog_gut">Dog gut</option> | |
240 <option value="ocean">Ocean</option> | |
241 <option value="soil">Soil</option> | |
242 <option value="cat_gut">Cat gut</option> | |
243 <option value="human_oral">Human oral</option> | |
244 <option value="mouse_gut">Mouse gut</option> | |
245 <option value="pig_gut">Pig gut</option> | |
246 <option value="built_environment">Built environment</option> | |
247 <option value="wastewater">Wastewater</option> | |
248 <option value="global">Global</option> | |
249 </param> | |
250 </xml> | |
251 <xml name="minfasta-kbs"> | |
252 <param argument="--minfasta-kbs" type="integer" min="0" value="200" label="Miminimum bin size in Kbps"/> | |
253 </xml> | |
254 <xml name="no-recluster"> | |
255 <param argument="--no-recluster" type="boolean" truevalue="--no-recluster" falsevalue="" checked="false" label="Do not recluster bins?"/> | |
256 </xml> | |
257 <xml name="data"> | |
258 <param argument="--data" type="data" format="csv" label="Train data"/> | |
259 </xml> | |
260 <xml name="data_output_single"> | |
261 <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data"> | |
262 <filter>mode["select"]=="single" or mode["select"]=="co"</filter> | |
263 </data> | |
264 <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data"> | |
265 <filter>mode["select"]=="single" or mode["select"]=="co"</filter> | |
266 </data> | |
267 </xml> | |
268 <xml name="data_output_multi"> | |
269 <collection name="multi_data" type="list" label="${tool.name} on ${on_string}: Training data per sample"> | |
270 <filter>mode["select"]=="multi"</filter> | |
271 <discover_datasets pattern="(?P<designation>.*)\/data.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
272 </collection> | |
273 <collection name="multi_data_split" type="list" label="${tool.name} on ${on_string}: Split training data per sample"> | |
274 <filter>mode["select"]=="multi"</filter> | |
275 <discover_datasets pattern="(?P<designation>.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
276 </collection> | |
277 </xml> | |
278 <xml name="generate_sequence_features_extra_outputs"> | |
279 <data name="single_cov" format="csv" from_work_dir="output/*_data_cov.csv" label="${tool.name} on ${on_string}: Coverage"> | |
280 <filter>mode["select"]=="single" and extra_output and "coverage" in extra_output</filter> | |
281 </data> | |
282 <data name="single_split_cov" format="csv" from_work_dir="output/*_data_split_cov.csv" label="${tool.name} on ${on_string}: Coverage (split data)"> | |
283 <filter>mode["select"]=="single" and extra_output and "coverage" in extra_output</filter> | |
284 </data> | |
285 <collection name="co_cov" type="list" label="${tool.name} on ${on_string}: Coverage"> | |
286 <filter>mode["select"]=="co" and extra_output and "coverage" in extra_output</filter> | |
287 <discover_datasets pattern=".*\.bam_(?P<designation>.*)_data_cov\.csv" format="csv" directory="output/" /> | |
288 </collection> | |
289 <collection name="co_split_cov" type="list" label="${tool.name} on ${on_string}: Coverage (split data) per sample"> | |
290 <filter>mode["select"]=="co" and extra_output and "coverage" in extra_output</filter> | |
291 <discover_datasets pattern=".*\.bam_(?P<designation>.*)_data_split_cov\.csv" format="csv" directory="output/" /> | |
292 </collection> | |
293 <collection name="multi_cov" type="list" label="${tool.name} on ${on_string}: Coverage"> | |
294 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter> | |
295 <discover_datasets pattern=".*\.bam_(?P<designation>.*)_data_cov.csv" format="csv" directory="output/samples/" /> | |
296 </collection> | |
297 <collection name="multi_cov_sample" type="list" label="${tool.name} on ${on_string}: Coverage per sample"> | |
298 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter> | |
299 <discover_datasets pattern="(?P<designation>.*)\/data_cov.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
300 </collection> | |
301 <collection name="multi_split_cov" type="list" label="${tool.name} on ${on_string}: Coverage (split data) per sample"> | |
302 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter> | |
303 <discover_datasets pattern=".*\.bam_(?P<designation>.*)_data_split_cov.csv" format="csv" directory="output/samples/" /> | |
304 </collection> | |
305 <collection name="multi_split_cov_sample" type="list" label="${tool.name} on ${on_string}: Coverage (split data) per sample"> | |
306 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter> | |
307 <discover_datasets pattern="(?P<designation>.*)\/data_split_cov.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
308 </collection> | |
309 <collection name="multi_contigs" type="list" label="${tool.name} on ${on_string}: Contigs"> | |
310 <filter>mode["select"]=="multi" and extra_output and "contigs" in extra_output</filter> | |
311 <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/samples/" /> | |
312 </collection> | |
313 </xml> | |
314 <xml name="train_output"> | |
315 <data name="model" format="h5" from_work_dir="output/model.h5" label="${tool.name} on ${on_string}: Semi-supervised deep learning model" /> | |
316 </xml> | |
317 <xml name="cannot_link_output"> | |
318 <data name="cannot" format="txt" from_work_dir="output/cannot/cannot.txt" label="${tool.name} on ${on_string}: Cannot-link constraints" /> | |
319 </xml> | |
320 <token name="@HELP_HEADER@"><![CDATA[ | |
321 What it does | |
322 ============ | |
323 | |
324 SemiBin is a Semi-supervised siamese neural network for metagenomic binning | |
325 | |
326 ]]></token> | |
327 <token name="@HELP_INPUT_FASTA@"><![CDATA[ | |
328 - Contigs in fasta for 1 or several samples from single or co-assembly | |
329 ]]></token> | |
330 <token name="@HELP_INPUT_BAM@"><![CDATA[ | |
331 - BAM with reads mapping to the contigs | |
332 ]]></token> | |
333 <token name="@HELP_CANNOT@"><![CDATA[ | |
334 - Cannot-link constraints | |
335 ]]></token> | |
336 <token name="@HELP_DATA@"><![CDATA[ | |
337 - Training data and split training data for the model | |
338 ]]></token> | |
339 <token name="@HELP_MODEL@"><![CDATA[ | |
340 - Semi-supervised deep learning model | |
341 ]]></token> | |
342 <token name="@HELP_BINS@"><![CDATA[ | |
343 - Reconstructed bins after reclustering | |
344 - Reconstructed bins before reclustering | |
345 ]]></token> | |
346 <xml name="citations"> | |
347 <citations> | |
348 <citation type="doi">10.1038/s41467-022-29843-y</citation> | |
349 </citations> | |
350 </xml> | |
351 </macros> |