Mercurial > repos > jjohnson > drep
comparison macros.xml @ 0:b59ae99e47d4 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/drep commit b155a1d533b7317ceb0ec642ffe3e986117df539"
author | jjohnson |
---|---|
date | Mon, 06 Jan 2020 11:11:06 -0500 |
parents | |
children | 7e2debc267eb |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b59ae99e47d4 |
---|---|
1 <macros> | |
2 <token name="@VERSION@">2.3.2</token> | |
3 <xml name="requirements"> | |
4 <requirements> | |
5 <requirement type="package" version="@VERSION@">drep</requirement> | |
6 <yield/> | |
7 </requirements> | |
8 </xml> | |
9 <xml name="citations"> | |
10 <citations> | |
11 <citation type="doi">10.1038/ismej.2017.126</citation> | |
12 <yield /> | |
13 </citations> | |
14 </xml> | |
15 | |
16 | |
17 <xml name="genomes"> | |
18 <param argument="--genomes" type="data" format="fasta" label="genomes fasta files" multiple="true"/> | |
19 </xml> | |
20 <token name="@PREPARE_GENOMES@"><![CDATA[ | |
21 #import re | |
22 #set $genomefiles = [] | |
23 #for $genome in $genomes | |
24 #set $input_name = $re.sub('[^\w\-_.]', '_',str($genome.element_identifier.split('/')[-1])) | |
25 ln -s '${genome}' '${input_name}' && | |
26 $genomefiles.append($input_name) | |
27 #end for | |
28 ]]></token> | |
29 <token name="@GENOMES@"><![CDATA[ | |
30 -g | |
31 #for $genomefile in $genomefiles | |
32 '${genomefile}' | |
33 #end for | |
34 ]]></token> | |
35 | |
36 | |
37 <xml name="checkm_method"> | |
38 <param argument="--checkM_method" type="select" label="checkm method" optional="true"> | |
39 <option value="lineage_wf">lineage_wf (more accurate)</option> | |
40 <option value="taxonomy_wf">taxonomy_wf (faster)</option> | |
41 </param> | |
42 </xml> | |
43 <token name="@CHECKM_METHOD@"><![CDATA[ | |
44 #if $checkM_method: | |
45 --checkM_method $checkM_method | |
46 #end if | |
47 ]]></token> | |
48 | |
49 <xml name="filtering_options"> | |
50 <conditional name="filter"> | |
51 <param name="set_options" type="select" label="set filtering options"> | |
52 <option value="yes">Yes</option> | |
53 <option value="no" selected="true">No</option> | |
54 </param> | |
55 <when value="yes"> | |
56 <param argument="--length" type="integer" value="50000" label="Minimum genome length"/> | |
57 <param argument="--completeness" type="integer" value="75" min="0" max="100" label="Minimum genome completeness percent"/> | |
58 <param argument="--contamination" type="integer" value="25" min="0" max="100" label="Maximum genome contamination percent"/> | |
59 | |
60 <conditional name="quality"> | |
61 <param argument="source" type="select" label="genome quality"> | |
62 <help> | |
63 --ignoreGenomeQuality is useful with | |
64 bacteriophages or eukaryotes or things where checkM | |
65 scoring does not work. Will only choose genomes based | |
66 on length and N50. | |
67 </help> | |
68 <option value="checkm" selected="true">Run checkM</option> | |
69 <option value="genomeInfo">User supplied genomeInfo csv file</option> | |
70 <option value="ignoreGenomeQuality">--ignoreGenomeQuality (NOT RECOMMENDED!)</option> | |
71 </param> | |
72 <when value="checkm"> | |
73 <param argument="--checkM_method" type="select" label="checkm method" optional="true"> | |
74 <option value="lineage_wf">lineage_wf (more accurate)</option> | |
75 <option value="taxonomy_wf">taxonomy_wf (faster)</option> | |
76 </param> | |
77 </when> | |
78 <when value="genomeInfo"> | |
79 <param argument="--genomeInfo" type="data" format="csv" label="genomes fasta files"> | |
80 <help><![CDATA[ | |
81 A CSV dataset that must contain: [ | |
82 "genome"(history dataset name of .fasta dataset of that genome), | |
83 "completeness"(0-100 value for completeness of the genome), | |
84 "contamination"(0-100 value of the contamination of the genome)] | |
85 ]]></help> | |
86 </param> | |
87 </when> | |
88 <when value="ignoreGenomeQuality"/> | |
89 </conditional> | |
90 | |
91 </when> | |
92 <when value="no"/> | |
93 </conditional> | |
94 </xml> | |
95 <token name="@FILTER_OPTIONS@"><![CDATA[ | |
96 #if $filter.set_options == 'yes': | |
97 --length $filter.length | |
98 --completeness $filter.completeness | |
99 --contamination $filter.contamination | |
100 #if $filter.quality.source == 'checkm' | |
101 --checkM_method $filter.quality.checkM_method | |
102 #elif $filter.quality.source == 'genomeInfo' | |
103 --genomeInfo $filter.quality.genomeInfo | |
104 #elif $filter.quality.source == 'ignoreGenomeQuality' | |
105 --ignoreGenomeQuality | |
106 #end if | |
107 #end if | |
108 ]]></token> | |
109 | |
110 <xml name="genome_comparison_options"> | |
111 <conditional name="genome_comparison"> | |
112 <param name="set_options" type="select" label="set genome comparison options"> | |
113 <option value="yes">Yes</option> | |
114 <option value="no" selected="true">No</option> | |
115 </param> | |
116 <when value="yes"> | |
117 <param argument="--MASH_sketch" type="integer" value="1000" label="MASH sketch size"/> | |
118 <param argument="--S_algorithm" type="select" label="Algorithm for secondary clustering comaprisons"> | |
119 <option value="ANImf" selected="true">ANImf = (RECOMMENDED) Align whole genomes with nucmer; filter alignment; compare aligned regions</option> | |
120 <option value="ANIn">ANIn = Align whole genomes with nucmer; compare aligned regions</option> | |
121 <option value="gANI">gANI = Identify and align ORFs; compare aligned ORFS</option> | |
122 </param> | |
123 <param argument="-n_PRESET" type="select" label="Presets to pass to nucmer"> | |
124 <option value="normal" selected="true">normal = default ANIn parameters (default: normal)</option> | |
125 <option value="tight">tight = only align highly conserved regions</option> | |
126 </param> | |
127 </when> | |
128 <when value="no"/> | |
129 </conditional> | |
130 </xml> | |
131 <token name="@GENOME_COMPARISON_OPTIONS@"><![CDATA[ | |
132 #if $genome_comparison.set_options == 'yes': | |
133 --MASH_sketch $genome_comparison.MASH_sketch | |
134 --S_algorithm $genome_comparison.S_algorithm | |
135 -n_PRESET $genome_comparison.n_PRESET | |
136 #end if | |
137 ]]></token> | |
138 | |
139 <xml name="clustering_options"> | |
140 <conditional name="clustering"> | |
141 <param name="set_options" type="select" label="set clustering options"> | |
142 <option value="yes">Yes</option> | |
143 <option value="no" selected="true">No</option> | |
144 </param> | |
145 <when value="yes"> | |
146 <param argument="--P_ani" type="float" value="0.9" min="0." max="1." label="ANI threshold to form primary (MASH) clusters"/> | |
147 <param argument="--S_ani" type="float" value="0.99" min="0." max="1." label="ANI threshold to form secondary clusters"/> | |
148 | |
149 <param argument="--SkipMash" type="boolean" truevalue="--SkipMash" falsevalue="" checked="false" label="Skip MASH clustering, just do secondary clustering on all genomes"/> | |
150 <param argument="--SkipSecondary" type="boolean" truevalue="--SkipSecondary" falsevalue="" checked="false" label="Skip secondary clustering, just perform MASH clustering"/> | |
151 <param argument="--cov_thresh" type="float" value="0.1" min="0." max="1." label="Minmum level of overlap between genomes when doing secondary comparisons"/> | |
152 <param argument="--coverage_method" type="select" label="Method to calculate coverage of an alignment"> | |
153 <help>(for ANIn/ANImf only; gANI can only do larger method)</help> | |
154 <option value="larger" selected="true">arger = max((aligned length / genome 1), (aligned_length / genome2))</option> | |
155 <option value="total">total = 2*(aligned length) / (sum of total genome lengths)</option> | |
156 </param> | |
157 <param argument="--clusterAlg" type="select" label="Algorithm used to cluster genomes"> | |
158 <help>(passed to scipy.cluster.hierarchy.linkage)</help> | |
159 <option value="average" selected="true">average</option> | |
160 </param> | |
161 </when> | |
162 <when value="no"/> | |
163 </conditional> | |
164 </xml> | |
165 <token name="@CLUSTERING_OPTIONS@"><![CDATA[ | |
166 #if $clustering.set_options == 'yes': | |
167 --P_ani $clustering.P_ani | |
168 --S_ani $clustering.S_ani | |
169 $clustering.SkipMash | |
170 $clustering.SkipSecondary | |
171 --cov_thresh $clustering.cov_thresh | |
172 --coverage_method $clustering.coverage_method | |
173 --clusterAlg $clustering.clusterAlg | |
174 #end if | |
175 ]]></token> | |
176 | |
177 <xml name="scoring_options"> | |
178 <conditional name="scoring"> | |
179 <param name="set_options" type="select" label="set scoring options"> | |
180 <option value="yes">Yes</option> | |
181 <option value="no" selected="true">No</option> | |
182 </param> | |
183 <when value="yes"> | |
184 <param argument="--completeness_weight" type="float" value="1" label="completeness weight"> | |
185 <help> | |
186 Based off of the formula: | |
187 A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size) | |
188 A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight; | |
189 </help> | |
190 </param> | |
191 <param argument="--contamination_weight" type="float" value="5" label="contamination weight"/> | |
192 <param argument="--strain_heterogeneity_weight" type="float" value="1" min="0." max="1." label="strain heterogeneity weight"/> | |
193 <param argument="--N50_weight" type="float" value=".5" label="weight of log(genome N50)"/> | |
194 <param argument="--size_weight" type="float" value="0" label="weight of log(genome size)"/> | |
195 </when> | |
196 <when value="no"/> | |
197 </conditional> | |
198 </xml> | |
199 <token name="@SCORING_OPTIONS@"><![CDATA[ | |
200 #if $scoring.set_options == 'yes': | |
201 --completeness_weight $scoring.completeness_weight | |
202 --contamination_weight $scoring.contamination_weight | |
203 --strain_heterogeneity_weight $scoring.strain_heterogeneity_weight | |
204 --N50_weight $scoring.N50_weight | |
205 --size_weight $scoring.size_weight | |
206 #end if | |
207 ]]></token> | |
208 | |
209 <xml name="taxonomy_options"> | |
210 <conditional name="taxonomy"> | |
211 <param name="set_options" type="select" label="generate taxonomy information"> | |
212 <option value="yes">Yes</option> | |
213 <option value="no" selected="true">No</option> | |
214 </param> | |
215 <when value="yes"> | |
216 <param argument="--tax_method" type="select" label="Method of determining taxonomy"> | |
217 <help>(for ANIn/ANImf only; gANI can only do larger method)</help> | |
218 <option value="percent" selected="true">percent = The most descriptive taxonimic level with at least (per) hits</option> | |
219 <option value="max">max = The centrifuge taxonomic level with the most overall hits</option> | |
220 </param> | |
221 <param argument="--percent" type="float" value="50" min="0" max="100" label="minimum percent for percent method"/> | |
222 <param argument="--cent_index" type="data" format="" label="centrifuge index"/> | |
223 </when> | |
224 <when value="no"/> | |
225 </conditional> | |
226 </xml> | |
227 <token name="@TAXONOMY_OPTIONS@"><![CDATA[ | |
228 #if $taxonomy.set_options == 'yes': | |
229 --run_tax | |
230 --tax_method $taxonomy.tax_method | |
231 --percent $taxonomy.percent | |
232 --cent_index $taxonomy.cent_index | |
233 #end if | |
234 ]]></token> | |
235 | |
236 <xml name="warning_options"> | |
237 <conditional name="warning"> | |
238 <param name="set_options" type="select" label="set warning options"> | |
239 <option value="yes">Yes</option> | |
240 <option value="no" selected="true">No</option> | |
241 </param> | |
242 <when value="yes"> | |
243 <param argument="--warn_dist" type="float" value="0.25" min="0" max="1" label="How far from the threshold to throw cluster warnings"/> | |
244 <param argument="--warn_sim" type="float" value="0.98" min="0" max="1" label="Similarity threshold for warnings between dereplicated genomes"/> | |
245 <param argument="--warn_aln" type="float" value="0.25" min="0" max="1" label="Minimum aligned fraction for warnings between dereplicated genomes (ANIn)"/> | |
246 </when> | |
247 <when value="no"/> | |
248 </conditional> | |
249 </xml> | |
250 <token name="@WARNING_OPTIONS@"><![CDATA[ | |
251 #if $warning.set_options == 'yes': | |
252 --warn_dist $warning.warn_dist | |
253 --warn_sim $warning.warn_sim | |
254 --warn_aln $warning.warn_aln | |
255 #end if | |
256 ]]></token> | |
257 | |
258 <xml name="select_outputs"> | |
259 </xml> | |
260 | |
261 <xml name="common_outputs"> | |
262 <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outdir/log/logger.log"/> | |
263 <data name="warnings" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outdir/log/warnings.txt"/> | |
264 <data name="Primary_clustering_dendrogram" format="pdf" label="${tool.name} on ${on_string}: Primary_clustering_dendrogram.pdf" from_work_dir="outdir/figures/Primary_clustering_dendrogram.pdf"/> | |
265 <data name="Secondary_clustering_dendrograms" format="pdf" label="${tool.name} on ${on_string}: Secondary_clustering_dendrograms.pdf" from_work_dir="outdir/figures/Secondary_clustering_dendrograms.pdf"/> | |
266 <data name="Secondary_clustering_MDS" format="pdf" label="${tool.name} on ${on_string}: Secondary_clustering_MDS.pdf" from_work_dir="outdir/figures/Secondary_clustering_MDS.pdf"/> | |
267 <data name="Clustering_scatterplots" format="pdf" label="${tool.name} on ${on_string}: Clustering_scatterplots.pdf" from_work_dir="outdir/figures/Clustering_scatterplots.pdf"/> | |
268 </xml> | |
269 <xml name="common_outputs2"> | |
270 </xml> | |
271 | |
272 <token name="@GENOMES_HELP@"><![CDATA[ | |
273 I/O PARAMETERS: | |
274 -g [GENOMES [GENOMES ...]], --genomes [GENOMES [GENOMES ...]] | |
275 genomes to cluster in .fasta format (default: None) | |
276 ]]></token> | |
277 | |
278 <token name="@FILTERING_HELP@"><![CDATA[ | |
279 FILTERING OPTIONS: | |
280 -l LENGTH, --length LENGTH | |
281 Minimum genome length (default: 50000) | |
282 -comp COMPLETENESS, --completeness COMPLETENESS | |
283 Minumum genome completeness (default: 75) | |
284 -con CONTAMINATION, --contamination CONTAMINATION | |
285 Maximum genome contamination (default: 25) | |
286 --ignoreGenomeQuality | |
287 Don't run checkM or do any quality filtering. NOT | |
288 RECOMMENDED! This is useful for use with | |
289 bacteriophages or eukaryotes or things where checkM | |
290 scoring does not work. Will only choose genomes based | |
291 on length and N50 (default: False) | |
292 | |
293 | |
294 ]]></token> | |
295 | |
296 <token name="@GENOME_COMPARISON_HELP@"><![CDATA[ | |
297 GENOME COMPARISON PARAMETERS: | |
298 -ms MASH_SKETCH, --MASH_sketch MASH_SKETCH | |
299 MASH sketch size (default: 1000) | |
300 --S_algorithm {goANI,ANIn,ANImf,gANI} | |
301 Algorithm for secondary clustering comaprisons: | |
302 ANImf = (RECOMMENDED) Align whole genomes with nucmer; filter alignment; compare aligned regions | |
303 ANIn = Align whole genomes with nucmer; compare aligned regions | |
304 gANI = Identify and align ORFs; compare aligned ORFS | |
305 (default: ANImf) | |
306 -n_PRESET {normal,tight} | |
307 Presets to pass to nucmer | |
308 tight = only align highly conserved regions | |
309 normal = default ANIn parameters (default: normal) | |
310 | |
311 ]]></token> | |
312 | |
313 <token name="@CLUSTERING_HELP@"><![CDATA[ | |
314 CLUSTERING PARAMETERS: | |
315 -pa P_ANI, --P_ani P_ANI | |
316 ANI threshold to form primary (MASH) clusters | |
317 (default: 0.9) | |
318 -sa S_ANI, --S_ani S_ANI | |
319 ANI threshold to form secondary clusters (default: | |
320 0.99) | |
321 --SkipMash Skip MASH clustering, just do secondary clustering on | |
322 all genomes (default: False) | |
323 --SkipSecondary Skip secondary clustering, just perform MASH | |
324 clustering (default: False) | |
325 -nc COV_THRESH, --cov_thresh COV_THRESH | |
326 Minmum level of overlap between genomes when doing | |
327 secondary comparisons (default: 0.1) | |
328 -cm {total,larger}, --coverage_method {total,larger} | |
329 Method to calculate coverage of an alignment | |
330 (for ANIn/ANImf only; gANI can only do larger method) | |
331 total = 2*(aligned length) / (sum of total genome lengths) | |
332 larger = max((aligned length / genome 1), (aligned_length / genome2)) | |
333 (default: larger) | |
334 --clusterAlg CLUSTERALG | |
335 Algorithm used to cluster genomes (passed to | |
336 scipy.cluster.hierarchy.linkage (default: average) | |
337 | |
338 ]]></token> | |
339 | |
340 <token name="@SCORING_HELP@"><![CDATA[ | |
341 SCORING CRITERIA | |
342 Based off of the formula: | |
343 A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size) | |
344 | |
345 A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight: | |
346 -comW COMPLETENESS_WEIGHT, --completeness_weight COMPLETENESS_WEIGHT | |
347 completeness weight (default: 1) | |
348 -conW CONTAMINATION_WEIGHT, --contamination_weight CONTAMINATION_WEIGHT | |
349 contamination weight (default: 5) | |
350 -strW STRAIN_HETEROGENEITY_WEIGHT, --strain_heterogeneity_weight STRAIN_HETEROGENEITY_WEIGHT | |
351 strain heterogeneity weight (default: 1) | |
352 -N50W N50_WEIGHT, --N50_weight N50_WEIGHT | |
353 weight of log(genome N50) (default: 0.5) | |
354 -sizeW SIZE_WEIGHT, --size_weight SIZE_WEIGHT | |
355 weight of log(genome size) (default: 0) | |
356 | |
357 ]]></token> | |
358 | |
359 <token name="@TAXONOMY_HELP@"><![CDATA[ | |
360 TAXONOMY: | |
361 --run_tax generate taxonomy information (Tdb) (default: False) | |
362 --tax_method {percent,max} | |
363 Method of determining taxonomy | |
364 percent = The most descriptive taxonimic level with at least (per) hits | |
365 max = The centrifuge taxonomic level with the most overall hits (default: percent) | |
366 -per PERCENT, --percent PERCENT | |
367 minimum percent for percent method (default: 50) | |
368 --cent_index CENT_INDEX | |
369 path to centrifuge index (for example, | |
370 /home/mattolm/download/centrifuge/indices/b+h+v | |
371 (default: None) | |
372 | |
373 ]]></token> | |
374 | |
375 <token name="@WARNINGS_HELP@"><![CDATA[ | |
376 WARNINGS: | |
377 --warn_dist WARN_DIST | |
378 How far from the threshold to throw cluster warnings | |
379 (default: 0.25) | |
380 --warn_sim WARN_SIM Similarity threshold for warnings between dereplicated | |
381 genomes (default: 0.98) | |
382 --warn_aln WARN_ALN Minimum aligned fraction for warnings between | |
383 dereplicated genomes (ANIn) (default: 0.25) | |
384 | |
385 ]]></token> | |
386 | |
387 | |
388 </macros> |