# HG changeset patch # User artbio # Date 1561363373 14400 # Node ID 4554fa330d3d7e3a7c6dd132f06f4fb3fa78e6df # Parent 99a5ed06b86c460cf14ed491a9e43d3e3cac00b1 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/concat_multi_datasets commit 4df03fd2d6fbf17a451256c0fb9d30932fc9d637 diff -r 99a5ed06b86c -r 4554fa330d3d catWrapper.xml --- a/catWrapper.xml Mon Jun 24 03:58:52 2019 -0400 +++ b/catWrapper.xml Mon Jun 24 04:02:53 2019 -0400 @@ -1,4 +1,4 @@ - + tail-to-head by specifying how > '$out_file1' && + #else: + $concat_command '$file' >> '$out_file1' && + #end if + printf "Done\n" && #end for - > '$out_file1' + sleep 1 #else: #for $file in $global_condition.inputs - #if $file.ext[-2:] == "gz": + printf "${file.element_identifier}..." && + #if $file.ext[-2:] == "gz" and $headers != 0: printf "# ${file.element_identifier}\n" | gzip -c >> '$out_file1' && gzip -dc "$file" | $concat_command |gzip -c >> '$out_file1' && #else: printf "# ${file.element_identifier}\n" >> '$out_file1' && $concat_command "$file" >> '$out_file1' && #end if + printf "Done\n" && #end for sleep 1 #end if #else if $global_condition.input_type == "simple_collections": - #if $global_condition.collections_condition.collection_cat_type == "two_collections": mkdir concatenated && #if $dataset_names == "No": - #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2) - $concat_command '$x' '$y' > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && + #for $x, $y in zip($global_condition.input_1, $global_condition.input_2): + printf "${x.element_identifier} and ${y.element_identifier}..." && + #if $x.ext[-2:] == "gz" and $headers != 0: + gzip -dc '$x' | $concat_command | gzip -c > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && + gzip -dc '$y' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && + #else: + $concat_command '$x' '$y' > concatenated/'${x.element_identifier}.listed.${x.ext}.listed' && + #end if + printf "Done\n" && #end for sleep 1 #else: - #for $x, $y in zip($global_condition.collections_condition.input_1, $global_condition.collections_condition.input_2) - #if $x.ext[-2:] == "gz": + #for $x, $y in zip($global_condition.input_1, $global_condition.input_2) + printf "${x.element_identifier} and ${y.element_identifier}..." && + #if $x.ext[-2:] == "gz" and $headers != 0: printf "# ${x.element_identifier}\n" | gzip -c > concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && gzip -dc '$x' | $concat_command | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && printf "# ${y.element_identifier}\n" | gzip -c >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && @@ -46,85 +61,162 @@ printf "# ${y.element_identifier}\n" >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && $concat_command '$y' >> concatenated/'${x.element_identifier}_${y.element_identifier}.listed.${x.ext}.listed' && #end if + printf "Done\n" && #end for sleep 1 #end if - #end if #else if $global_condition.input_type == "paired_collection": #if $global_condition.paired_cat_type == "by_strand": + mkdir concatenated && #if $dataset_names == "No": #for $file in $global_condition.inputs - $concat_command - $file['forward'] - >> '$forward' && - $concat_command - $file['reverse'] - >> '$reverse' && + printf "${file.element_identifier}- forward and reverse..." && + #if $file['forward'].ext[-2:] == "gz" and $headers != 0: + gzip -dc $file['forward'] | $concat_command | gzip -c >> concatenated/forward.listed.${file['forward'].ext}.listed && + gzip -dc $file['reverse'] | $concat_command | gzip -c >> concatenated/reverse.listed.${file['reverse'].ext}.listed && + #else: + $concat_command $file['forward'] >> concatenated/forward.listed.${file['forward'].ext}.listed && + $concat_command $file['reverse'] >> concatenated/reverse.listed.${file['reverse'].ext}.listed && + #end if + printf "Done\n" && #end for sleep 1 #else: - #for $file in $global_condition.inputs.keys() - printf "# ${file}_forward\n" >> '$forward' && - $concat_command - $global_condition.inputs[$file]['forward'] - >> '$forward' && - printf "# ${file}_reverse\n" >> '$reverse' && - $concat_command - $global_condition.inputs[$file]['reverse'] - >> '$reverse' && + #for $file in $global_condition.inputs.keys(): + printf "$file - forward and reverse..." && + #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: + printf "# ${file}_forward\n" | gzip -c >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && + gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && + printf "# ${file}_reverse\n" | gzip -c >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && + gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && + #else: + printf "# ${file}_forward\n" >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && + $concat_command $global_condition.inputs[$file]['forward'] >> concatenated/forward.listed.${global_condition.inputs[$file]['forward'].ext}.listed && + printf "# ${file}_reverse\n" >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && + $concat_command $global_condition.inputs[$file]['reverse'] >> concatenated/reverse.listed.${global_condition.inputs[$file]['reverse'].ext}.listed && + #end if + printf "Done\n" && #end for sleep 1 #end if #else if $global_condition.paired_cat_type == "by_pair": mkdir concatenated && #if $dataset_names == "No": - #for $file in $global_condition.inputs.keys() - $concat_command - $global_condition.inputs[$file]['forward'] - > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - $concat_command - $global_condition.inputs[$file]['reverse'] - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #for $file in $global_condition.inputs.keys(): + printf "$file - forward and reverse..." && + #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: + gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c + > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #else: + $concat_command $global_condition.inputs[$file]['forward'] + > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command $global_condition.inputs[$file]['reverse'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #end if + printf "Done\n" && #end for sleep 1 #else: - #for $file in $global_condition.inputs.keys() - printf "# ${file}_forward\n" > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - $concat_command - $global_condition.inputs[$file]['forward'] - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - printf "# ${file}_reverse\n" >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && - $concat_command - $global_condition.inputs[$file]['reverse'] - >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #for $file in $global_condition.inputs.keys(): + printf "$file - forward and reverse..." && + #if $global_condition.inputs[$file]['reverse'].ext[-2:] == "gz" and $headers != 0: + printf "# ${file}_forward\n" | gzip -c > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + printf "# ${file}_reverse\n" | gzip -c >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #else: + printf "# ${file}_forward\n" > concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command $global_condition.inputs[$file]['forward'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + printf "# ${file}_reverse\n" >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + $concat_command $global_condition.inputs[$file]['reverse'] + >> concatenated/'${file}.listed.${global_condition.inputs[$file]['reverse'].ext}.listed' && + #end if + printf "Done\n" && #end for sleep 1 #end if #else if $global_condition.paired_cat_type == "all": + mkdir concatenated && + #set $base_name=$global_condition.inputs.element_identifier + #set $extention=$global_condition.inputs[$global_condition.inputs.keys()[0]]['forward'].ext #if $dataset_names == "No": - #for $file in $global_condition.inputs.keys() - $concat_command - $global_condition.inputs[$file]['forward'] - >> $out_file1 && - $concat_command - $global_condition.inputs[$file]['reverse'] - >> $out_file1 && + #for $file in $global_condition.inputs.keys(): + printf "$file - forward and reverse..." && + #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: + gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> c'$paired_out_file' && + gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> '$paired_out_file' && + #else: + $concat_command + $global_condition.inputs[$file]['forward'] + >> '$paired_out_file' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> '$paired_out_file' && + #end if + printf "Done\n" && #end for sleep 1 #else: - #for $file in $global_condition.inputs.keys() - printf "# ${file}_forward\n" > $out_file1 && - $concat_command - $global_condition.inputs[$file]['forward'] - >> $out_file1 && - printf "# ${file}_reverse\n" >> $out_file1 && - $concat_command - $global_condition.inputs[$file]['reverse'] - >> $out_file1 && + #for $file in $global_condition.inputs.keys(): + printf "$file - forward and reverse..." && + #if $global_condition.inputs[$file]['forward'].ext[-2:] == "gz" and $headers != 0: + printf "# ${file}_forward\n" | gzip -c > '$paired_out_file' && + gzip -dc $global_condition.inputs[$file]['forward'] | $concat_command | gzip -c >> '$paired_out_file' && + printf "# ${file}_reverse\n" | gzip -c >> '$paired_out_file' && + gzip -dc $global_condition.inputs[$file]['reverse'] | $concat_command | gzip -c >> '$paired_out_file' && + #else: + printf "# ${file}_forward\n" > '$paired_out_file' && + $concat_command + $global_condition.inputs[$file]['forward'] + >> '$paired_out_file' && + printf "# ${file}_reverse\n" >> '$paired_out_file' && + $concat_command + $global_condition.inputs[$file]['reverse'] + >> '$paired_out_file' && + #end if + printf "Done\n" && #end for sleep 1 #end if #end if + #else if $global_condition.input_type == "nested_collection": + mkdir concatenated && + #if $dataset_names == "No": + #for $sub_list in $global_condition.inputs: + #set $file_base_name=$sub_list.element_identifier + #for $sub_list_element in $sub_list: + printf "${file_base_name} - ${sub_list_element.element_identifier}..." && + #if $sub_list_element.ext[-2:] == "gz" and $headers != 0: + gzip -dc ${sub_list_element} | $concat_command | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + #else: + $concat_command ${sub_list_element} >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + #end if + printf "Done\n" && + #end for + #end for + sleep 1 + #else: + #for $sub_list in $global_condition.inputs: + #set $file_base_name=$sub_list.element_identifier + #for $sub_list_element in $sub_list: + printf "${file_base_name} - ${sub_list_element.element_identifier}..." && + #if $sub_list_element.ext[-2:] == "gz" and $headers != 0: + printf "# ${sub_list_element.element_identifier}\n" | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + gzip -dc ${sub_list_element} | $concat_command | gzip -c >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + #else: + printf "# ${sub_list_element.element_identifier}\n" >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + $concat_command ${sub_list_element} >> concatenated/'${file_base_name}.listed.${sub_list_element.ext}.listed' && + #end if + printf "Done\n" && + #end for + #end for + sleep 1 + #end if #end if ]]> @@ -132,8 +224,9 @@ - + + @@ -147,15 +240,11 @@ - - - - - - - - - + + + + + @@ -163,16 +252,18 @@ - global_condition['input_type'] == 'singles' or (global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all') + global_condition['input_type'] == 'singles' + + + global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'all' - - + global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_strand' - - (global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair') or (global_condition['input_type'] == 'simple_collections' and global_condition['collections_condition']['collection_cat_type'] == 'two_collections') + + (global_condition['input_type'] == 'paired_collection' and global_condition['paired_cat_type'] == 'by_pair') or (global_condition['input_type'] == 'simple_collections') or (global_condition['input_type'] == 'nested_collection') @@ -322,6 +413,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -329,9 +462,9 @@ **WARNING:** This tool does not check if the datasets being concatenated are in the same format. -**WARNING:** The paired collection operations do not handle gziped files. +**WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items. -**WARNING:** When concatenating 2 collections make sure the first collection is the one with the most items. +**WARNING:** This tool can't handle nested collection deeper than list:list. ----- @@ -349,6 +482,8 @@ - When the inputs are 2 collections: datasets are concatenated in a pairwise combination and a single dataset collection is returned + - When nested collection concatenation: datasets in each sub-collection are concatenated and a simple dataset collection is returned + - Skipping lines before concatenation to avoid headers - Add the name of the concatenated files as separator @@ -456,6 +591,32 @@ ----- +**Nested collection concatenation example** + +Nested collection: + + - Experiment + + - Sample_1 + + - Sample_1_file_1 + - Sample_1_file_2 + + - Sample_2 + + - Sample_2_file_1 + - Sample_2_file_2 + - Sample_2_file_3 + +Concatenation result:: + + A single collection containing: + + - Sample_1: (Sample_1_file_1 + Sample_1_file_2) + - Sample_2: (Sample_2_file_1 + Sample_2_file_2 + Sample_2_file_3) + +----- + **When selecting "Include dataset names" when concatenating files**: 1rst file name="first_tabular"::