comparison kmc_tools_transform.xml @ 0:ca2743037241 draft

"planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/kmc commit 29b98036c21809c923a92feb38b736c007d2e303"
author galaxy-australia
date Tue, 27 Sep 2022 05:20:06 +0000
parents
children c7fda6e88567
comparison
equal deleted inserted replaced
-1:000000000000 0:ca2743037241
1 <tool id="kmc_transform" name="KMC transform" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" >
2 <description>single KMC's database</description>
3 <xrefs>
4 <xref type='bio.tools'>kmc</xref>
5 </xrefs>
6 <macros>
7 <import>macros.xml</import>
8 </macros>
9 <expand macro="requirements" />
10 <expand macro="stdio" />
11 <expand macro="version_command" />
12 <command><![CDATA[
13 mkdir outdir &&
14 #for $f in $input_collection:
15 #if '.kmc_suf' in str($f.element_identifier)
16 ln -s '${f}' db.kmc_suf &&
17 #else
18 ln -s '${f}' db.kmc_pre &&
19 #end if
20 #end for
21 kmc_tools
22 -t\${GALAXY_SLOTS:-2}
23 transform
24 db
25 #set $operation_arr=[]
26 #for $i,$opt in enumerate($operations)
27 #silent $operation_arr.append(str($opt.ops.transform_operation))
28 #set $i = $i + 1
29 #if str($opt.ops.transform_operation) == "sort" or str($opt.ops.transform_operation) == "reduce" or str($opt.ops.transform_operation) == "compact":
30 #if str($opt.ops.input_opt.min_kmer_occurrence) != '':
31 -ci'$opt.ops.input_opt.min_kmer_occurrence'
32 #end if
33 #if str($opt.ops.input_opt.max_kmer_occurrence) != '':
34 -cx'$opt.ops.input_opt.max_kmer_occurrence'
35 #end if
36 $opt.ops.transform_operation
37 '${opt.ops.transform_operation}_${i}_db'
38 #if str($opt.ops.output_opt.min_kmer_occurrence) != '':
39 -ci'$opt.ops.output_opt.min_kmer_occurrence'
40 #end if
41 #if str($opt.ops.output_opt.max_kmer_occurrence) != '':
42 -cx'$opt.ops.output_opt.max_kmer_occurrence'
43 #end if
44 #if str($opt.ops.output_opt.max_counter_value) != '':
45 -cs'$opt.ops.output_opt.max_counter_value'
46 #end if
47 #elif str($opt.ops.transform_operation) == "histogram":
48 #if str($opt.ops.input_opt.min_kmer_occurrence) != '':
49 -ci'$opt.ops.input_opt.min_kmer_occurrence'
50 #end if
51 #if str($opt.ops.input_opt.max_kmer_occurrence) != '':
52 -cx'$opt.ops.input_opt.max_kmer_occurrence'
53 #end if
54 $opt.ops.transform_operation
55 '${opt.ops.transform_operation}'.txt
56 #if str($opt.ops.output_opt.min_kmer_value) != '':
57 -ci'$opt.ops.output_opt.min_kmer_value'
58 #end if
59 #if str($opt.ops.output_opt.max_kmer_value) != '':
60 -cx'$opt.ops.output_opt.max_kmer_value'
61 #end if
62 #elif str($opt.ops.transform_operation) == "dump":
63 #if str($opt.ops.input_opt.min_kmer_occurrence) != '':
64 -ci'$opt.ops.input_opt.min_kmer_occurrence'
65 #end if
66 #if str($opt.ops.input_opt.max_kmer_occurrence) != '':
67 -cx'$opt.ops.input_opt.max_kmer_occurrence'
68 #end if
69 $opt.ops.transform_operation
70 #if str($opt.ops.output_opt.sort_output) == "true":
71 -s
72 #end if
73 '${opt.ops.transform_operation}'.txt
74 #elif str($opt.ops.transform_operation) == "set_counts":
75 #if str($opt.ops.input_opt.min_kmer_occurrence) != '':
76 -ci'$opt.ops.input_opt.min_kmer_occurrence'
77 #end if
78 #if str($opt.ops.input_opt.max_kmer_occurrence) != '':
79 -cx'$opt.ops.input_opt.max_kmer_occurrence'
80 #end if
81 $opt.ops.transform_operation
82 $opt.ops.input_opt.kmer_counts_value
83 '${opt.ops.transform_operation}_${i}_db'
84 #end if
85 #end for
86
87 #for $i,$op in enumerate($operation_arr):
88 #set $i = $i + 1
89 #if str($op) == "sort" or str($op) == "reduce" or str($op) == "compact" or str($op) == "set_counts":
90 && cp '${op}_${i}'_db.kmc_suf outdir/
91 && cp '${op}_${i}'_db.kmc_pre outdir/
92 #else if str($op) == "histogram":
93 && cp histogram.txt outdir/
94 #else if str($op) == "dump":
95 && cp '${op}'.txt outdir/
96 #end if
97 #end for
98
99
100 ]]></command>
101 <inputs>
102 <param name="input_collection" type="data_collection" collection_type="list" label="KMC db"/>
103 <repeat name="operations" title="Operations" min="1" max="5">
104 <conditional name="ops">
105 <expand macro="transform_operation"/>
106 <when value="sort">
107 <expand macro="transform_option"/>
108 </when>
109 <when value="reduce">
110 <expand macro="transform_option"/>
111 </when>
112 <when value="compact">
113 <expand macro="transform_option"/>
114 </when>
115 <when value="histogram">
116 <expand macro="histogram_option"/>
117 </when>
118 <when value="dump">
119 <expand macro="dump_option"/>
120 </when>
121 <when value="set_counts">
122 <expand macro="set_count_option"/>
123 </when>
124 </conditional>
125 </repeat>
126 </inputs>
127 <outputs>
128 <collection name="kmc_db" type="list" label="${tool.name} on ${on_string}: kmc outputs">
129 <discover_datasets pattern="(?P&lt;designation&gt;.+)" format="binary" directory="outdir/" />
130 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt" format="tabular" directory="outdir/" />
131 </collection>
132 </outputs>
133
134 <tests>
135 <test>
136 <!-- #1 test perform sort operation on KMC DB -->
137 <param name="input_collection">
138 <collection type="list">
139 <element name="db.kmc_suf" value="db.kmc_suf"/>
140 <element name="db.kmc_pre" value="db.kmc_pre"/>
141 </collection>
142 </param>
143 <conditional name="ops">
144 <param name="transform_operation" value="sort"/>
145 </conditional>
146 <section name="input_opt">
147 <param name="min_kmer_occurrence" value="2"/>
148 <param name="max_kmer_occurrence" value="20"/>
149 </section>
150 <section name="output_opt">
151 <param name="min_kmer_occurrence" value="3"/>
152 <param name="max_kmer_occurrence" value="30"/>
153 </section>
154 <output_collection name="kmc_db" type="list">
155 <element name="sort_1_db.kmc_suf" file="sort_1_db.kmc_suf"/>
156 <element name="sort_1_db.kmc_suf" file="sort_1_db.kmc_suf"/>
157 </output_collection>
158 </test>
159 <test>
160 <!-- #2 test Generate Histogram Table -->
161 <param name="input_collection">
162 <collection type="list">
163 <element name="db.kmc_suf" value="db.kmc_suf"/>
164 <element name="db.kmc_pre" value="db.kmc_pre"/>
165 </collection>
166 </param>
167 <conditional name="ops">
168 <param name="transform_operation" value="histogram"/>
169 </conditional>
170 <section name="input_opt">
171 <param name="min_kmer_occurrence" value="3"/>
172 <param name="max_kmer_occurrence" value="30"/>
173 </section>
174 <section name="output_opt">
175 <param name="min_kmer_occurrence" value="2"/>
176 <param name="max_kmer_occurrence" value="255"/>
177 </section>
178 <output_collection name="kmc_db" type="list">
179 <element name="histogram" file="histogram.txt" ftype="tabular"/>
180 </output_collection>
181 </test>
182 <test>
183 <!-- #3 test Generate dump Table -->
184 <param name="input_collection">
185 <collection type="list">
186 <element name="db.kmc_suf" value="db.kmc_suf"/>
187 <element name="db.kmc_pre" value="db.kmc_pre"/>
188 </collection>
189 </param>
190 <conditional name="ops">
191 <param name="transform_operation" value="dump"/>
192 </conditional>
193 <section name="input_opt">
194 <param name="min_kmer_occurrence" value="3"/>
195 <param name="max_kmer_occurrence" value="30"/>
196 </section>
197 <section name="output_opt">
198 <param name="min_kmer_occurrence" value="2"/>
199 <param name="max_kmer_occurrence" value="255"/>
200 </section>
201 <output_collection name="kmc_db" type="list">
202 <element name="dump" file="dump.txt" ftype="tabular"/>
203 </output_collection>
204 </test>
205 <test>
206 <!-- #4 test perform reduce operation -->
207 <param name="input_collection">
208 <collection type="list">
209 <element name="db.kmc_suf" value="db.kmc_suf"/>
210 <element name="db.kmc_pre" value="db.kmc_pre"/>
211 </collection>
212 </param>
213 <repeat name="operations">
214 <conditional name="ops">
215 <param name="transform_operation" value="reduce"/>
216 </conditional>
217 <section name="input_opt">
218 <param name="min_kmer_occurrence" value="2"/>
219 <param name="max_kmer_occurrence" value="20"/>
220 </section>
221 </repeat>
222 <repeat name="operations">
223 <conditional name="ops">
224 <param name="transform_operation" value="sort"/>
225 </conditional>
226 <section name="input_opt">
227 <param name="min_kmer_occurrence" value="3"/>
228 <param name="max_kmer_occurrence" value="30"/>
229 </section>
230 </repeat>
231 <repeat name="operations">
232 <conditional name="ops">
233 <param name="transform_operation" value="compact"/>
234 </conditional>
235 <section name="input_opt">
236 <param name="min_kmer_occurrence" value="4"/>
237 <param name="max_kmer_occurrence" value="40"/>
238 </section>
239 </repeat>
240 <output_collection name="kmc_db" type="list">
241 <element name="reduce_1_db.kmc_suf" file="reduce_1_db.kmc_suf"/>
242 <element name="reduce_1_db.kmc_pre" file="reduce_1_db.kmc_pre"/>
243 <element name="sort_2_db.kmc_suf" file="sort_2_db.kmc_suf"/>
244 <element name="sort_2_db.kmc_pre" file="sort_2_db.kmc_pre"/>
245 <element name="compact_3_db.kmc_suf" file="compact_3_db.kmc_suf"/>
246 <element name="compact_3_db.kmc_pre" file="compact_3_db.kmc_pre"/>
247 </output_collection>
248 </test>
249 </tests>
250 <help><![CDATA[
251
252 .. class:: infomark
253
254 **What it does**
255
256 *This operation transforms single KMC database to one or more KMC database(s) or text file(s).*
257
258 **Input**
259
260 - input file - path to databases generated by KMC (KMC generates 2 files with the same name, but different extensions (i.e kmc_suf and kmc_pre).
261
262 **Available Operations:**
263
264 - sort - converts database produced by KMC2.x to KMC1.x database format (which contains k-mers in sorted order)
265 - reduce - exclude too rare and too frequent k-mers
266 - compact - remove counters of k-mers
267 - histogram - produce histogram of k-mers occurrences
268 - dump - produce text dump of kmc database
269 - set_counts <value> - set all k-mer counts to specific value
270
271 **Generate KMC DB**
272
273 - kmc -k27 file.fastq kmers_db kmc_tmp_dir
274
275 **Example 1: split k-mers on a valid and invalid database**
276
277 *Let's suppose k-mers with occurences below 11 are erroneous due to sequencing erros. With reduce we can split k-mer set to one set with valid k-mers and one with invalid:*
278
279 - kmc_tools transform kmers_db reduce reduce_1_db -cx10 reduce reduce_2_db -ci11 histogram histogram.txt dump dump.txt
280
281 **Example 2: perform all operations**
282
283 - kmc_tools transform kmers_db reduce -ci10 reduce_1_db sort sort_2_db compact compact_3_db histogram histogram.txt dump dump.txt
284
285 **Output**
286
287 *Example 1:*
288
289 - reduce_1_db.kmc_suf
290 - reduce_1_db.kmc_pre
291 - reduce_2_db.kmc_suf
292 - reduce_2_db.kmc_pre
293 - histogram
294 - dump
295
296 *Example 2:*
297
298 - reduce_1_db.kmc_suf
299 - reduce_1_db.kmc_pre
300 - sort_2_db.kmc_suf
301 - sort_2_db.kmc_pre
302 - compact_3_db.kmc_suf
303 - compact_3_db.kmc_pre
304 - histogram
305 - dump
306
307
308
309 .. class:: infomark
310
311 **References**
312
313 More information are available on `website <https://github.com/refresh-bio/KMC/blob/master/kmc_tools.pdf>`_.
314 ]]></help>
315 <citations>
316 <citation type="doi">DOI: 10.1093/bioinformatics/btx304</citation>
317 <citation type="doi">DOI: 10.1093/bioinformatics/btv022</citation>
318 <citation type="doi">DOI: 10.1186/1471-2105-14-160</citation>
319 </citations>
320 </tool>