Mercurial > repos > nml > collapse_collections
changeset 4:25136a2b0cfe draft
planemo upload commit 0340e76ceab90331dab96f4a6b8a9b7df5b8c1c2
| author | nml | 
|---|---|
| date | Fri, 10 Mar 2017 16:12:32 -0500 | 
| parents | c0c988378838 | 
| children | 33151a38533a | 
| files | merge.xml test-data/answer2.tsv test-data/answer3.tsv test-data/strain1.tsv test-data/strain2.tsv | 
| diffstat | 5 files changed, 62 insertions(+), 12 deletions(-) [+] | 
line wrap: on
 line diff
--- a/merge.xml Mon Oct 24 16:23:11 2016 -0400 +++ b/merge.xml Fri Mar 10 16:12:32 2017 -0500 @@ -1,20 +1,44 @@ -<tool id="collapse_dataset" name="Collapse Collection" version="3.0"> - <description>Collapse collection into single dataset in order of the collection</description> +<tool id="collapse_dataset" name="Collapse Collection" version="4.0"> + <description>into single dataset in order of the collection</description> <command> <![CDATA[ - ( + ( + #if $one_header: + #if $filename.add_name: + awk '{if (NR==1) {print "Sample\t"$0}}' "$input_list[0]"; + #else: + awk '{if (NR==1) {print}}' "$input_list[0]"; + #end if + #end if + #for $f in $input_list# #if $filename.add_name: #if str($filename.place_name) == "same_once": + #if $one_header: + printf "$f.element_identifier\t"; tail -q -n +2 "$f"; + #else: printf "$f.element_identifier\t"; cat "$f"; + #end if #elif str($filename.place_name) == "same_multiple": + #if $one_header: + awk '{if (NR!=1) {print "$f.element_identifier\t"$0}}' "$f"; + #else: awk '{print "$f.element_identifier\t"$0}' "$f"; + #end if #elif str($filename.place_name) == "above": + #if $one_header: + printf "$f.element_identifier\n"; tail -q -n +2 "$f"; + #else: printf "$f.element_identifier\n"; cat "$f"; + #end if #end if #else: - cat "$f" ; + #if $one_header: + awk '{if (NR!=1) {print}}' "$f"; + #else: + cat "$f" ; + #end if #end if #end for# @@ -25,8 +49,10 @@ </command> <inputs> - <param name="input_list" type="data" format="data" label="Collection of files to collapse into single dataset" help="" optional="false" multiple="true" /> + <param name="input_list" type="data" format="data" label="Collection of files to collapse into single dataset" help="" optional="false" multiple="true" /> + <param name="one_header" type="boolean" display="checkboxes" label="Keep one header line" help="Combine first line of each file as the header for the final dataset. Useful when same header line is found in all files."/> <conditional name="filename"> + <param name="add_name" type="boolean" display="checkboxes" label="Append File name"/> <when value="true"> <param name="place_name" type="select" label="Where to add dataset name"> @@ -44,17 +70,25 @@ </outputs> <tests> <test> - <param name="input_list"> - <collection type="list"> - <element name="input1" value="input1" /> - <element name="input2" value="input2" /> - </collection> - </param> + <param name="input_list" value="input1,input2"/> <output name="output" file="answer.txt"/> </test> + <test> + <param name="input_list" value="strain1.tsv,strain2.tsv"/> + <param name="one_header" value="True"/> + <param name="add_name" value="True"/> + <param name="place_name" value="same_multiple"/> + <output name="output" file="answer2.tsv"/> + </test> + <test> + <param name="input_list" value="strain1.tsv,strain2.tsv"/> + <param name="one_header" value="True"/> + <output name="output" file="answer3.tsv"/> + </test> + </tests> <help> - Combines a list collection into a single file dataset with option to include dataset names. + Combines a list collection into a single file dataset with option to include dataset names or merge common header line. </help> <citations> </citations>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/answer2.tsv Fri Mar 10 16:12:32 2017 -0500 @@ -0,0 +1,5 @@ +Sample seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected +strain1.tsv mcr_1 52 52.74000 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +strain1.tsv mcr_2 0 1.60905 0.48114 1617 0 0.00000 56 3.51980 3.51980 +strain2.tsv mcr_1 85 85.61500 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +strain2.tsv mcr_2 0 3.05343 0.48114 1617 0 0.00000 66 4.14833 4.14833
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/answer3.tsv Fri Mar 10 16:12:32 2017 -0500 @@ -0,0 +1,5 @@ +seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected +mcr_1 52 52.74000 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +mcr_2 0 1.60905 0.48114 1617 0 0.00000 56 3.51980 3.51980 +mcr_1 85 85.61500 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +mcr_2 0 3.05343 0.48114 1617 0 0.00000 66 4.14833 4.14833
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/strain1.tsv Fri Mar 10 16:12:32 2017 -0500 @@ -0,0 +1,3 @@ +seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected +mcr_1 52 52.74000 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +mcr_2 0 1.60905 0.48114 1617 0 0.00000 56 3.51980 3.51980
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/strain2.tsv Fri Mar 10 16:12:32 2017 -0500 @@ -0,0 +1,3 @@ +seq_name median mean gc% seq_length invalid_bases %_invalid non_zero_bases %_non_zero %_non_zero_corrected +mcr_1 85 85.61500 0.49139 1626 0 0.00000 1600 100.00000 100.00000 +mcr_2 0 3.05343 0.48114 1617 0 0.00000 66 4.14833 4.14833
