Mercurial > repos > artbio > concatenate_multiple_datasets
changeset 3:62aebaf6cfa0 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 64e9762ab35b04bb0d151e441baa2fae8bf2cb4a
author | artbio |
---|---|
date | Fri, 10 May 2019 10:15:02 -0400 |
parents | 1fe4d165ac0e |
children | 7afc0515a307 |
files | catWrapper.xml |
diffstat | 1 files changed, 125 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/catWrapper.xml Mon Apr 15 18:52:43 2019 -0400 +++ b/catWrapper.xml Fri May 10 10:15:02 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.1.0"> +<tool id="cat_multi_datasets" name="Concatenate multiple datasets" version="1.2.0"> <description>tail-to-head by specifying how</description> <command><![CDATA[ #if $headers == 0: @@ -25,6 +25,31 @@ #end for sleep 1 #end if + #else if $global_condition.input_type == "simple_collections": + #if $global_condition.collections_condition.collection_cat_type == "two_collections": + mkdir concatenated && + #if $dataset_names == "No": + #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2) + $concat_command '$x' '$y' > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && + #end for + sleep 1 + #else: + #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2) + #if $x.ext[-2:] == "gz": + printf "# ${x.element_identifier}\n" | gzip -c > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + gzip -dc '$x' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + printf "# ${y.element_identifier}\n" | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + gzip -dc '$y' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + #else: + printf "# ${x.element_identifier}\n" > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + $concat_command '$x'>> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + printf "# ${y.element_identifier}\n" >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + $concat_command '$y' >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && + #end if + #end for + sleep 1 + #end if + #end if #else if $global_condition.input_type == "paired_collection": #if $global_condition.paired_cat_type == "by_strand": #if $dataset_names == "No": @@ -107,19 +132,31 @@ <conditional name="global_condition"> <param name="input_type" type="select" label="What type of data do you wish to concatenate?" help="Depending on the type of input selected the concatenation options will differ"> <option value="singles">Single datasets</option> + <option value="simple_collections">Collections</option> <option value="paired_collection">Paired collection</option> </param> <when value="singles"> <param name="inputs" type="data" label="Concatenate Datasets" multiple="True" help="All inputed datasets will be concatenated tail-to-head."/> </when> <when value="paired_collection"> - <param name="inputs" type="data_collection" collection_type="list:paired" label="Input paired collections to concatenate"/> + <param name="inputs" type="data_collection" collection_type="list:paired" label="Input paired collection to concatenate"/> <param name="paired_cat_type" type="select" label="What type of concatenation do you wish to perform?"> <option value="by_strand">Concatenate all datsets of same strand (outputs a single pair of datasets)</option> <option value="by_pair">Concatenate pairs of datasets (outputs an unpaired collection of datasets)</option> <option value="all">Concatenate all datasets into a single file regardless of strand (outputs a single file)</option> </param> </when> + <when value="simple_collections"> + <conditional name="collections_condition"> + <param name="collection_cat_type" type="select" label="What type of concatenation do you wish to perform?"> + <option value="two_collections">Concatenate datasets of 2 collections (outputs a simple collection)</option> + </param> + <when value="two_collections"> + <param name="input_1" type="data_collection" collection_type="list" label="Input first collection" help="The first collection contains the datasets that will be written first in the concatenated file" /> + <param name="input_2" type="data_collection" collection_type="list" label="Input second collection" help="The second collection contains the datasets that will be written last in the concatenated file" /> + </when> + </conditional> + </when> </conditional> <param name="dataset_names" type="boolean" label="Include dataset names?" truevalue="Yes" falsevalue="No" checked="false" help="If 'Yes' is selected '#name of dataset' will be added when concatenating."/> <param name="headers" type="integer" label="Number of lines to skip at the beginning of each concatenation:" value="0" help="This paremeter exists so as to not concatenate comments or headers contained at the start of the files."/> @@ -135,7 +172,7 @@ </collection> <collection name="list_output" type="list" label="Concatenation by pairs"> <discover_datasets pattern="(?P<name>.*)\.listed\.(?P<ext>.*)\.listed" visible="false" directory="concatenated"/> - <filter>global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair'</filter> + <filter>(global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair') or (global_condition['input_type'] == 'simple_collections' and global_condition['collections_condition']['collection_cat_type'] == 'two_collections')</filter> </collection> </outputs> <tests> @@ -241,6 +278,50 @@ <element name="reverse" file="r.fastq"/> </output_collection> </test> + <test> <!-- Test 2 collections concatenation --> + <param name="input_type" value="simple_collections" /> + <param name="collection_cat_type" value="two_collections"/> + <param name="input_1"> + <collection type="list"> + <element name="2" value="2_f.fastq"/> + <element name="3" value="3_f.fastq"/> + <element name="4" value="4_f.fastq"/> + </collection> + </param> + <param name="input_2"> + <collection type="list"> + <element name="2" value="2_r.fastq"/> + <element name="3" value="3_r.fastq"/> + <element name="4" value="4_r.fastq"/> + </collection> + </param> + <param name="dataset_names" value="No" /> + <param name="headers" value="0" /> + <output_collection name="list_output" type="list" count="3" > + <element name="2" file="2.fastq"/> + <element name="3" file="3.fastq"/> + <element name="4" file="4.fastq"/> + </output_collection> + </test> + <test> <!-- Test 2 collections concatenation with other options--> + <param name="input_type" value="simple_collections" /> + <param name="collection_cat_type" value="two_collections"/> + <param name="input_1"> + <collection type="list"> + <element name="1_f.fastq" value="1_f.fastq.gz"/> + </collection> + </param> + <param name="input_2"> + <collection type="list"> + <element name="1_r.fastq" value="1_r.fastq.gz"/> + </collection> + </param> + <param name="dataset_names" value="Yes" /> + <param name="headers" value="4" /> + <output_collection name="list_output" type="list" count="1" > + <element name="1_f.fastq_1_r.fastq" file="1_options.fastq.gz" decompress="True"/> + </output_collection> + </test> </tests> <help> @@ -250,13 +331,23 @@ **WARNING:** The paired collection operations do not handle gziped files. +**WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items. + ----- **What it does** Concatenates datasets and paired collections with multiple options: - - It's possible select either a concatenation by strand, by pair or a whole collection concatenation, when the input is a paired collection. + - When the input is a paired collection: + + - concatenation by strand : forward and reverse datasets are concatenated separately and a list with a single forward - reverse dataset pair is returned + + - concatenation by pair : forward - reverse dataset pairs are concatenated and a simple dataset collection is returned + + - whole collection concatenation : all datasets in the collection are concatenated and a single dataset is returned + + - When the inputs are 2 collections: datasets are concatenated in a pairwise combination and a single dataset collection is returned - Skipping lines before concatenation to avoid headers @@ -294,6 +385,33 @@ ----- +**2 Collections concatenation** + +1rst collection:: + + a + b + c + d + +2nd collection:: + + 1 + 2 + 3 + 4 + +Concatenation result:: + + A single collection containing: + + a concatenated with 1 + b concatenated with 2 + c concatenated with 3 + d concatenated with 4 + +----- + **Paired collection concatenation example** 1rst pair:: @@ -304,7 +422,7 @@ forward - reverse -Concatenation by strand:: +- Concatenation by strand:: concatenates: @@ -315,7 +433,7 @@ 1 pair -Concatenation by pair:: +- Concatenation by pair:: concatenates: @@ -326,7 +444,7 @@ 2 datasets -Concatenate all:: +- Concatenate all:: concatenates: