Mercurial > repos > iuc > datamash_ops
changeset 3:419027d822d6 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/datamash commit 206ac70388ea11b168366f1e30bb44157e371c6e"
author | iuc |
---|---|
date | Sun, 10 Apr 2022 11:41:19 +0000 |
parents | 562f3c677828 |
children | 746e8e4bf929 |
files | datamash-ops.xml macros.xml test-data/group_compute_input.csv |
diffstat | 3 files changed, 152 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/datamash-ops.xml Fri Sep 07 10:58:27 2018 -0400 +++ b/datamash-ops.xml Sun Apr 10 11:41:19 2022 +0000 @@ -1,5 +1,4 @@ -<?xml version="1.0"?> -<tool id="datamash_ops" name="Datamash" version="@WRAPPER_VERSION@"> +<tool id="datamash_ops" name="Datamash" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>(operations on tabular data)</description> <macros> <import>macros.xml</import> @@ -14,8 +13,9 @@ $need_sort $print_full_line $ignore_case - #if str($grouping).strip() - --group '$grouping' + @FIELD_SEPARATOR@ + #if str($grouping) != '' + --group '$grouping' #end if #for $oper in $operations ${oper.op_name} @@ -25,15 +25,23 @@ ]]> </command> <expand macro="inputs_outputs"> - <param help="Example: to group by the first and fourth fields, use 1,4." label="Group by fields" name="grouping" type="text"> - <validator message="Invalid value in field. Allowed values are 0-9, space, comma." type="regex">^[0-9, ]*$</validator> + <param argument="--group" name="grouping" type="text" label="Group by fields" help="Group consecutive rows with equal values in the chosen fields. If no columns are specified, each operation is performed in the entire input file. Comma separated list of column indices, e.g. 1,5"> + <sanitizer invalid_char=""> + <valid initial="string.digits"> + <add value="," /> + </valid> + <mapping initial="none"> + <add source=" " target=""/> + </mapping> + </sanitizer> + <validator message="Invalid value in field. Allowed is a comma separated list of integer values or the empty string" type="regex">(^$)|(^\s*\d+\s*(,\s*\d+\s*)*$)</validator> </param> - <param falsevalue="" help="--header-in" label="Input file has a header line" name="header_in" truevalue="--header-in" type="boolean" /> - <param falsevalue="" help="--header-out" label="Print header line" name="header_out" truevalue="--header-out" type="boolean" /> - <param falsevalue="" help="--sort" label="Sort input" name="need_sort" truevalue="--sort" type="boolean" /> - <param falsevalue="" help="--full" label="Print all fields from input file" name="print_full_line" truevalue="--full" type="boolean" /> - <param falsevalue="" help="--ignore-case" label="Ignore case when grouping" name="ignore_case" truevalue="--ignore-case" type="boolean" /> - <repeat default="1" min="1" name="operations" title="Operation to perform on each group"> + <param argument="--sort" name="need_sort" type="boolean" truevalue="--sort" falsevalue="" label="Sort input" help="Input file must be sorted by the grouping columns. Enable this option to automatically sort the input."/> + <param argument="--header-in" type="boolean" truevalue="--header-in" falsevalue="" label="Input file has a header line" /> + <param argument="--header-out" type="boolean" truevalue="--header-out" falsevalue="" label="Print header line" /> + <param argument="--full" name="print_full_line" type="boolean" truevalue="--full" falsevalue="" label="Print all fields from input file" /> + <param argument="--ignore-case" type="boolean" truevalue="--ignore-case" falsevalue="" label="Ignore case when grouping" /> + <repeat name="operations" default="1" min="1" title="Operation to perform on each group"> <param name="op_name" type="select" label="Type"> <option value="count">count</option> <option value="sum">sum</option> @@ -82,6 +90,39 @@ </repeat> <output file="group_compute_output.txt" name="out_file" ftype="tabular" /> </test> + <test> + <param name="in_file" value="group_compute_input.txt" ftype="tsv" /> + <param name="grouping" value="2" /> + <param name="header_in" value="true" /> + <param name="header_out" value="true" /> + <param name="need_sort" value="true" /> + <param name="print_full_line" value="false" /> + <param name="ignore_case" value="false" /> + <repeat name="operations"> + <param name="op_name" value="sum" /> + <param name="op_column" value="3" /> + </repeat> + <output file="group_compute_output.txt" name="out_file" ftype="tsv" /> + </test> + <test> + <param name="in_file" value="group_compute_input.csv" ftype="csv" /> + <param name="grouping" value="2" /> + <param name="header_in" value="true" /> + <param name="header_out" value="true" /> + <param name="need_sort" value="true" /> + <param name="print_full_line" value="false" /> + <param name="ignore_case" value="false" /> + <repeat name="operations"> + <param name="op_name" value="sum" /> + <param name="op_column" value="3" /> + </repeat> + <output name="out_file" ftype="csv"> + <assert_contents> + <has_n_lines n="7"/> + <has_line line="Arts,1310"/> + </assert_contents> + </output> + </test> </tests> <help> <![CDATA[ @@ -99,14 +140,14 @@ Name Major Score Bryan Arts 68 + Gabriel Health-Medicine 100 Isaiah Arts 80 - Gabriel Health-Medicine 100 Tysza Business 92 Zackery Engineering 54 ... ... -- Grouping the input by the second column (*Major*), and performing operations **mean** and **sample standard deviation** on the third column (*Score*), gives:: +- Grouping the input by the second column (*Major*), sorting the input, and performing operations **mean** and **sample standard deviation** on the third column (*Score*), gives:: GroupBy(Major) mean(Score) sstdev(Score) Arts 68.9474 10.4215 @@ -116,6 +157,8 @@ Life-Sciences 55.3333 20.606 Social-Sciences 60.2667 17.2273 +Note that input needs sorting here, since the column used for grouping (*Major*) is not sorted. + This sample file is available at http://www.gnu.org/software/datamash . **Example 2**
--- a/macros.xml Fri Sep 07 10:58:27 2018 -0400 +++ b/macros.xml Sun Apr 10 11:41:19 2022 +0000 @@ -1,19 +1,24 @@ -<?xml version="1.0"?> <macros> - <token name="@WRAPPER_VERSION@">1.1.0</token> + <token name="@TOOL_VERSION@">1.1.0</token> + <token name="@VERSION_SUFFIX@">1</token> + <token name="@PROFILE@">21.01</token> <xml name="inputs_outputs"> <inputs> - <param format="tabular" help="" label="Input tabular dataset" name="in_file" type="data" /> + <param name="in_file" type="data" format="tabular,csv,tsv" label="Input tabular dataset" help="" /> <yield /> </inputs> <outputs> - <data format="tabular" name="out_file" label="${tool.name} on ${on_string}" /> + <data name="out_file" format_source="in_file" label="${tool.name} on ${on_string}" /> </outputs> </xml> - + <token name="@FIELD_SEPARATOR@"><![CDATA[ + #if $in_file.ext == 'csv' + -t , + #end if + ]]></token> <xml name="requirements"> <requirements> - <requirement type="package" version="@WRAPPER_VERSION@">datamash</requirement> + <requirement type="package" version="@TOOL_VERSION@">datamash</requirement> </requirements> </xml> <xml name="stdio">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/group_compute_input.csv Sun Apr 10 11:41:19 2022 +0000 @@ -0,0 +1,84 @@ +Name,Major,Score +Ignatius,Engineering,83 +Austin,Life-Sciences,91 +Zackery,Engineering,54 +Marques,Arts,58 +Darren,Business,94 +Darius,Social-Sciences,51 +Thanh,Engineering,53 +Joe'Quann,Engineering,75 +Bryan,Arts,68 +Devin,Engineering,92 +Joseph,Social-Sciences,61 +Joshua,Life-Sciences,14 +Ja'Won,Social-Sciences,37 +Tyreque,Arts,74 +Sage,Arts,55 +Antonio,Engineering,88 +Michael,Engineering,39 +Randy,Social-Sciences,68 +Dilan,Health-Medicine,84 +Omar,Engineering,99 +Zachary,Arts,80 +Faison,Engineering,47 +Angel,Health-Medicine,100 +Gabriel,Health-Medicine,100 +John,Life-Sciences,70 +Leonard,Business,87 +Juan,Business,79 +Jonathan,Health-Medicine,100 +Christopher,Life-Sciences,59 +Brandon,Life-Sciences,72 +D'Angelo,Health-Medicine,90 +Justin,Social-Sciences,90 +Israel,Health-Medicine,81 +William,Arts,46 +David,Social-Sciences,69 +Drake,Social-Sciences,59 +Drake,Social-Sciences,76 +Nathan,Arts,71 +Trevon,Arts,74 +Aaron,Business,83 +Daniel,Health-Medicine,91 +Kevin,Health-Medicine,100 +Antonio,Engineering,56 +Donovan,Arts,75 +Kerris,Business,82 +Andre,Health-Medicine,72 +Dakota,Business,83 +Aaron,Life-Sciences,58 +Walter,Arts,75 +Isaiah,Arts,80 +Christian,Life-Sciences,67 +Dalton,Health-Medicine,100 +Jesse,Social-Sciences,32 +Diego,Health-Medicine,82 +Nathen,Life-Sciences,46 +Anthony,Life-Sciences,32 +Christian,Business,88 +David,Business,92 +Avery,Engineering,51 +Paul,Arts,63 +Derek,Arts,60 +Levi,Arts,76 +Lance,Social-Sciences,65 +Sonny,Engineering,50 +Shawn,Arts,65 +Leonardo,Engineering,78 +Yeng,Life-Sciences,39 +Leroy,Social-Sciences,74 +Gurnam,Life-Sciences,66 +Fernando,Arts,78 +Williams,Social-Sciences,62 +Roberto,Arts,65 +Teriuse,Business,94 +Nathaniel,Arts,88 +Chase,Social-Sciences,27 +Caleb,Business,87 +Tysza,Business,92 +Nico,Arts,59 +Manuel,Social-Sciences,61 +Patrick,Health-Medicine,92 +Peter,Health-Medicine,86 +Allen,Life-Sciences,50 +Joel,Social-Sciences,72