comparison catWrapper.xml @ 3:62aebaf6cfa0 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 64e9762ab35b04bb0d151e441baa2fae8bf2cb4a
author artbio
date Fri, 10 May 2019 10:15:02 -0400
parents 1fe4d165ac0e
children 7afc0515a307
comparison
equal deleted inserted replaced
2:1fe4d165ac0e 3:62aebaf6cfa0
1 <tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.1.0"> 1 <tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.2.0">
2 <description>tail-to-head by specifying how</description> 2 <description>tail-to-head by specifying how</description>
3 <command><![CDATA[ 3 <command><![CDATA[
4 #if $headers == 0: 4 #if $headers == 0:
5 #set $concat_command = "cat" 5 #set $concat_command = "cat"
6 #else: 6 #else:
23 $concat_command "$file" >> '$out_file1' && 23 $concat_command "$file" >> '$out_file1' &&
24 #end if 24 #end if
25 #end for 25 #end for
26 sleep 1 26 sleep 1
27 #end if 27 #end if
28 #else if $global_condition.input_type == "simple_collections":
29 #if $global_condition.collections_condition.collection_cat_type == "two_collections":
30 mkdir concatenated &&
31 #if $dataset_names == "No":
32 #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2)
33 $concat_command '$x' '$y' > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' &&
34 #end for
35 sleep 1
36 #else:
37 #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2)
38 #if $x.ext[-2:] == "gz":
39 printf "# ${x.element_identifier}\n" | gzip -c > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' &&
40 gzip -dc '$x' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' &&
41 printf "# ${y.element_identifier}\n" | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' &&
42 gzip -dc '$y' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' &&
43 #else:
44 printf "# ${x.element_identifier}\n" > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' &&
45 $concat_command '$x'>> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' &&
46 printf "# ${y.element_identifier}\n" >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' &&
47 $concat_command '$y' >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' &&
48 #end if
49 #end for
50 sleep 1
51 #end if
52 #end if
28 #else if $global_condition.input_type == "paired_collection": 53 #else if $global_condition.input_type == "paired_collection":
29 #if $global_condition.paired_cat_type == "by_strand": 54 #if $global_condition.paired_cat_type == "by_strand":
30 #if $dataset_names == "No": 55 #if $dataset_names == "No":
31 #for $file in $global_condition.inputs 56 #for $file in $global_condition.inputs
32 $concat_command 57 $concat_command
105 </command> 130 </command>
106 <inputs> 131 <inputs>
107 <conditional name="global_condition"> 132 <conditional name="global_condition">
108 <param name="input_type" type="select" label="What type of data do you wish to concatenate?" help="Depending on the type of input selected the concatenation options will differ"> 133 <param name="input_type" type="select" label="What type of data do you wish to concatenate?" help="Depending on the type of input selected the concatenation options will differ">
109 <option value="singles">Single datasets</option> 134 <option value="singles">Single datasets</option>
135 <option value="simple_collections">Collections</option>
110 <option value="paired_collection">Paired collection</option> 136 <option value="paired_collection">Paired collection</option>
111 </param> 137 </param>
112 <when value="singles"> 138 <when value="singles">
113 <param name="inputs" type="data" label="Concatenate Datasets" multiple="True" help="All inputed datasets will be concatenated tail-to-head."/> 139 <param name="inputs" type="data" label="Concatenate Datasets" multiple="True" help="All inputed datasets will be concatenated tail-to-head."/>
114 </when> 140 </when>
115 <when value="paired_collection"> 141 <when value="paired_collection">
116 <param name="inputs" type="data_collection" collection_type="list:paired" label="Input paired collections to concatenate"/> 142 <param name="inputs" type="data_collection" collection_type="list:paired" label="Input paired collection to concatenate"/>
117 <param name="paired_cat_type" type="select" label="What type of concatenation do you wish to perform?"> 143 <param name="paired_cat_type" type="select" label="What type of concatenation do you wish to perform?">
118 <option value="by_strand">Concatenate all datsets of same strand (outputs a single pair of datasets)</option> 144 <option value="by_strand">Concatenate all datsets of same strand (outputs a single pair of datasets)</option>
119 <option value="by_pair">Concatenate pairs of datasets (outputs an unpaired collection of datasets)</option> 145 <option value="by_pair">Concatenate pairs of datasets (outputs an unpaired collection of datasets)</option>
120 <option value="all">Concatenate all datasets into a single file regardless of strand (outputs a single file)</option> 146 <option value="all">Concatenate all datasets into a single file regardless of strand (outputs a single file)</option>
121 </param> 147 </param>
148 </when>
149 <when value="simple_collections">
150 <conditional name="collections_condition">
151 <param name="collection_cat_type" type="select" label="What type of concatenation do you wish to perform?">
152 <option value="two_collections">Concatenate datasets of 2 collections (outputs a simple collection)</option>
153 </param>
154 <when value="two_collections">
155 <param name="input_1" type="data_collection" collection_type="list" label="Input first collection" help="The first collection contains the datasets that will be written first in the concatenated file" />
156 <param name="input_2" type="data_collection" collection_type="list" label="Input second collection" help="The second collection contains the datasets that will be written last in the concatenated file" />
157 </when>
158 </conditional>
122 </when> 159 </when>
123 </conditional> 160 </conditional>
124 <param name="dataset_names" type="boolean" label="Include dataset names?" truevalue="Yes" falsevalue="No" checked="false" help="If 'Yes' is selected '#name of dataset' will be added when concatenating."/> 161 <param name="dataset_names" type="boolean" label="Include dataset names?" truevalue="Yes" falsevalue="No" checked="false" help="If 'Yes' is selected '#name of dataset' will be added when concatenating."/>
125 <param name="headers" type="integer" label="Number of lines to skip at the beginning of each concatenation:" value="0" help="This paremeter exists so as to not concatenate comments or headers contained at the start of the files."/> 162 <param name="headers" type="integer" label="Number of lines to skip at the beginning of each concatenation:" value="0" help="This paremeter exists so as to not concatenate comments or headers contained at the start of the files."/>
126 </inputs> 163 </inputs>
133 <data name="reverse" /> 170 <data name="reverse" />
134 <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_strand'</filter> 171 <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_strand'</filter>
135 </collection> 172 </collection>
136 <collection name="list_output" type="list" label="Concatenation by pairs"> 173 <collection name="list_output" type="list" label="Concatenation by pairs">
137 <discover_datasets pattern="(?P&lt;name&gt;.*)\.listed\.(?P&lt;ext&gt;.*)\.listed" visible="false" directory="concatenated"/> 174 <discover_datasets pattern="(?P&lt;name&gt;.*)\.listed\.(?P&lt;ext&gt;.*)\.listed" visible="false" directory="concatenated"/>
138 <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair'</filter> 175 <filter>(global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair') or (global_condition['input_type'] == 'simple_collections' and global_condition['collections_condition']['collection_cat_type'] == 'two_collections')</filter>
139 </collection> 176 </collection>
140 </outputs> 177 </outputs>
141 <tests> 178 <tests>
142 <!-- Single files concatenation --> 179 <!-- Single files concatenation -->
143 <test> <!-- Test 2 single files concatenation with no other option --> 180 <test> <!-- Test 2 single files concatenation with no other option -->
239 <output_collection name="paired_output" type="paired" > 276 <output_collection name="paired_output" type="paired" >
240 <element name="forward" file="f.fastq"/> 277 <element name="forward" file="f.fastq"/>
241 <element name="reverse" file="r.fastq"/> 278 <element name="reverse" file="r.fastq"/>
242 </output_collection> 279 </output_collection>
243 </test> 280 </test>
281 <test> <!-- Test 2 collections concatenation -->
282 <param name="input_type" value="simple_collections" />
283 <param name="collection_cat_type" value="two_collections"/>
284 <param name="input_1">
285 <collection type="list">
286 <element name="2" value="2_f.fastq"/>
287 <element name="3" value="3_f.fastq"/>
288 <element name="4" value="4_f.fastq"/>
289 </collection>
290 </param>
291 <param name="input_2">
292 <collection type="list">
293 <element name="2" value="2_r.fastq"/>
294 <element name="3" value="3_r.fastq"/>
295 <element name="4" value="4_r.fastq"/>
296 </collection>
297 </param>
298 <param name="dataset_names" value="No" />
299 <param name="headers" value="0" />
300 <output_collection name="list_output" type="list" count="3" >
301 <element name="2" file="2.fastq"/>
302 <element name="3" file="3.fastq"/>
303 <element name="4" file="4.fastq"/>
304 </output_collection>
305 </test>
306 <test> <!-- Test 2 collections concatenation with other options-->
307 <param name="input_type" value="simple_collections" />
308 <param name="collection_cat_type" value="two_collections"/>
309 <param name="input_1">
310 <collection type="list">
311 <element name="1_f.fastq" value="1_f.fastq.gz"/>
312 </collection>
313 </param>
314 <param name="input_2">
315 <collection type="list">
316 <element name="1_r.fastq" value="1_r.fastq.gz"/>
317 </collection>
318 </param>
319 <param name="dataset_names" value="Yes" />
320 <param name="headers" value="4" />
321 <output_collection name="list_output" type="list" count="1" >
322 <element name="1_f.fastq_1_r.fastq" file="1_options.fastq.gz" decompress="True"/>
323 </output_collection>
324 </test>
244 </tests> 325 </tests>
245 <help> 326 <help>
246 327
247 .. class:: warningmark 328 .. class:: warningmark
248 329
249 **WARNING:** This tool does not check if the datasets being concatenated are in the same format. 330 **WARNING:** This tool does not check if the datasets being concatenated are in the same format.
250 331
251 **WARNING:** The paired collection operations do not handle gziped files. 332 **WARNING:** The paired collection operations do not handle gziped files.
252 333
334 **WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items.
335
253 ----- 336 -----
254 337
255 **What it does** 338 **What it does**
256 339
257 Concatenates datasets and paired collections with multiple options: 340 Concatenates datasets and paired collections with multiple options:
258 341
259 - It's possible select either a concatenation by strand, by pair or a whole collection concatenation, when the input is a paired collection. 342 - When the input is a paired collection:
343
344 - concatenation by strand : forward and reverse datasets are concatenated separately and a list with a single forward - reverse dataset pair is returned
345
346 - concatenation by pair : forward - reverse dataset pairs are concatenated and a simple dataset collection is returned
347
348 - whole collection concatenation : all datasets in the collection are concatenated and a single dataset is returned
349
350 - When the inputs are 2 collections: datasets are concatenated in a pairwise combination and a single dataset collection is returned
260 351
261 - Skipping lines before concatenation to avoid headers 352 - Skipping lines before concatenation to avoid headers
262 353
263 - Add the name of the concatenated files as separator 354 - Add the name of the concatenated files as separator
264 355
292 chr2 100000030 200000955 P 0 + 383 chr2 100000030 200000955 P 0 +
293 chr2 100000015 200000999 Q 0 + 384 chr2 100000015 200000999 Q 0 +
294 385
295 ----- 386 -----
296 387
388 **2 Collections concatenation**
389
390 1rst collection::
391
392 a
393 b
394 c
395 d
396
397 2nd collection::
398
399 1
400 2
401 3
402 4
403
404 Concatenation result::
405
406 A single collection containing:
407
408 a concatenated with 1
409 b concatenated with 2
410 c concatenated with 3
411 d concatenated with 4
412
413 -----
414
297 **Paired collection concatenation example** 415 **Paired collection concatenation example**
298 416
299 1rst pair:: 417 1rst pair::
300 418
301 forward - reverse 419 forward - reverse
302 420
303 2nd pair:: 421 2nd pair::
304 422
305 forward - reverse 423 forward - reverse
306 424
307 Concatenation by strand:: 425 - Concatenation by strand::
308 426
309 concatenates: 427 concatenates:
310 428
311 1rst forward + 2nd forward 429 1rst forward + 2nd forward
312 1rst reverse + 2nd reverse 430 1rst reverse + 2nd reverse
313 431
314 outputs: 432 outputs:
315 433
316 1 pair 434 1 pair
317 435
318 Concatenation by pair:: 436 - Concatenation by pair::
319 437
320 concatenates: 438 concatenates:
321 439
322 1rst forward + 1rst reverse 440 1rst forward + 1rst reverse
323 2nd forward + 2nd reverse 441 2nd forward + 2nd reverse
324 442
325 outputs: 443 outputs:
326 444
327 2 datasets 445 2 datasets
328 446
329 Concatenate all:: 447 - Concatenate all::
330 448
331 concatenates: 449 concatenates:
332 450
333 1rst forward + 1rst reverse + 2nd forward + 2nd reverse 451 1rst forward + 1rst reverse + 2nd forward + 2nd reverse
334 452