Mercurial > repos > iuc > datamash_ops

--- a/datamash-ops.xml	Fri Sep 07 10:58:27 2018 -0400
+++ b/datamash-ops.xml	Sun Apr 10 11:41:19 2022 +0000
@@ -1,5 +1,4 @@
-<?xml version="1.0"?>
-<tool id="datamash_ops" name="Datamash" version="@WRAPPER_VERSION@">
+<tool id="datamash_ops" name="Datamash" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>(operations on tabular data)</description>
     <macros>
         <import>macros.xml</import>
@@ -14,8 +13,9 @@
                 $need_sort
                 $print_full_line
                 $ignore_case
-                #if str($grouping).strip()
-                --group '$grouping'
+                @FIELD_SEPARATOR@
+                #if str($grouping) != ''
+                    --group '$grouping'
                 #end if
                 #for $oper in $operations
                     ${oper.op_name}
@@ -25,15 +25,23 @@
           ]]>
     </command>
     <expand macro="inputs_outputs">
-        <param help="Example: to group by the first and fourth fields, use 1,4." label="Group by fields" name="grouping" type="text">
-            <validator message="Invalid value in field. Allowed values are 0-9, space, comma." type="regex">^[0-9, ]*$</validator>
+        <param argument="--group" name="grouping" type="text" label="Group by fields" help="Group consecutive rows with equal values in the chosen fields. If no columns are specified, each operation is performed in the entire input file. Comma separated list of column indices, e.g. 1,5">
+            <sanitizer invalid_char="">
+                <valid initial="string.digits">
+                    <add value="," />
+                </valid>
+                <mapping initial="none">
+                    <add source=" " target=""/>
+                </mapping>
+            </sanitizer>
+            <validator message="Invalid value in field. Allowed is a comma separated list of integer values or the empty string" type="regex">(^$)|(^\s*\d+\s*(,\s*\d+\s*)*$)</validator>
         </param>
-        <param falsevalue="" help="--header-in" label="Input file has a header line" name="header_in" truevalue="--header-in" type="boolean" />
-        <param falsevalue="" help="--header-out" label="Print header line" name="header_out" truevalue="--header-out" type="boolean" />
-        <param falsevalue="" help="--sort" label="Sort input" name="need_sort" truevalue="--sort" type="boolean" />
-        <param falsevalue="" help="--full" label="Print all fields from input file" name="print_full_line" truevalue="--full" type="boolean" />
-        <param falsevalue="" help="--ignore-case" label="Ignore case when grouping" name="ignore_case" truevalue="--ignore-case" type="boolean" />
-        <repeat default="1" min="1" name="operations" title="Operation to perform on each group">
+        <param argument="--sort" name="need_sort" type="boolean" truevalue="--sort" falsevalue="" label="Sort input" help="Input file must be sorted by the grouping columns. Enable this option to automatically sort the input."/>
+        <param argument="--header-in" type="boolean" truevalue="--header-in" falsevalue="" label="Input file has a header line" />
+        <param argument="--header-out" type="boolean" truevalue="--header-out" falsevalue="" label="Print header line" />
+        <param argument="--full" name="print_full_line" type="boolean" truevalue="--full" falsevalue="" label="Print all fields from input file" />
+        <param argument="--ignore-case" type="boolean" truevalue="--ignore-case" falsevalue="" label="Ignore case when grouping" />
+        <repeat name="operations" default="1" min="1" title="Operation to perform on each group">
             <param name="op_name" type="select" label="Type">
                 <option value="count">count</option>
                 <option value="sum">sum</option>
@@ -82,6 +90,39 @@
             </repeat>
             <output file="group_compute_output.txt" name="out_file" ftype="tabular" />
         </test>
+        <test>
+            <param name="in_file" value="group_compute_input.txt" ftype="tsv" />
+            <param name="grouping" value="2" />
+            <param name="header_in" value="true" />
+            <param name="header_out" value="true" />
+            <param name="need_sort" value="true" />
+            <param name="print_full_line" value="false" />
+            <param name="ignore_case" value="false" />
+            <repeat name="operations">
+                <param name="op_name" value="sum" />
+                <param name="op_column" value="3" />
+            </repeat>
+            <output file="group_compute_output.txt" name="out_file" ftype="tsv" />
+        </test>
+        <test>
+            <param name="in_file" value="group_compute_input.csv" ftype="csv" />
+            <param name="grouping" value="2" />
+            <param name="header_in" value="true" />
+            <param name="header_out" value="true" />
+            <param name="need_sort" value="true" />
+            <param name="print_full_line" value="false" />
+            <param name="ignore_case" value="false" />
+            <repeat name="operations">
+                <param name="op_name" value="sum" />
+                <param name="op_column" value="3" />
+            </repeat>
+            <output name="out_file" ftype="csv">
+                <assert_contents>
+                    <has_n_lines n="7"/>
+                    <has_line line="Arts,1310"/>
+                </assert_contents>
+            </output>
+        </test>
     </tests>
     <help>
 <![CDATA[
@@ -99,14 +140,14 @@

     Name        Major            Score
     Bryan       Arts             68
+    Gabriel     Health-Medicine  100
     Isaiah      Arts             80
-    Gabriel     Health-Medicine  100
     Tysza       Business         92
     Zackery     Engineering      54
     ...
     ...

-- Grouping the input by the second column (*Major*), and performing operations **mean** and **sample standard deviation** on the third column (*Score*), gives::
+- Grouping the input by the second column (*Major*), sorting the input, and performing operations **mean** and **sample standard deviation** on the third column (*Score*), gives::

     GroupBy(Major)     mean(Score)   sstdev(Score)
     Arts               68.9474       10.4215
@@ -116,6 +157,8 @@
     Life-Sciences      55.3333       20.606
     Social-Sciences    60.2667       17.2273

+Note that input needs sorting here, since the column used for grouping (*Major*) is not sorted.
+
 This sample file is available at http://www.gnu.org/software/datamash .

 **Example 2**
--- a/macros.xml	Fri Sep 07 10:58:27 2018 -0400
+++ b/macros.xml	Sun Apr 10 11:41:19 2022 +0000
@@ -1,19 +1,24 @@
-<?xml version="1.0"?>
 <macros>
-    <token name="@WRAPPER_VERSION@">1.1.0</token>
+    <token name="@TOOL_VERSION@">1.1.0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@PROFILE@">21.01</token>
     <xml name="inputs_outputs">
         <inputs>
-            <param format="tabular" help="" label="Input tabular dataset" name="in_file" type="data" />
+            <param name="in_file" type="data" format="tabular,csv,tsv" label="Input tabular dataset" help="" />
             <yield />
         </inputs>
         <outputs>
-            <data format="tabular" name="out_file" label="${tool.name} on ${on_string}" />
+            <data name="out_file" format_source="in_file" label="${tool.name} on ${on_string}" />
         </outputs>
     </xml>
-
+    <token name="@FIELD_SEPARATOR@"><![CDATA[
+        #if $in_file.ext == 'csv'
+            -t ,
+        #end if
+    ]]></token>
     <xml name="requirements">
         <requirements>
-            <requirement type="package" version="@WRAPPER_VERSION@">datamash</requirement>
+            <requirement type="package" version="@TOOL_VERSION@">datamash</requirement>
         </requirements>
     </xml>
     <xml name="stdio">
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/group_compute_input.csv	Sun Apr 10 11:41:19 2022 +0000
@@ -0,0 +1,84 @@
+Name,Major,Score
+Ignatius,Engineering,83
+Austin,Life-Sciences,91
+Zackery,Engineering,54
+Marques,Arts,58
+Darren,Business,94
+Darius,Social-Sciences,51
+Thanh,Engineering,53
+Joe'Quann,Engineering,75
+Bryan,Arts,68
+Devin,Engineering,92
+Joseph,Social-Sciences,61
+Joshua,Life-Sciences,14
+Ja'Won,Social-Sciences,37
+Tyreque,Arts,74
+Sage,Arts,55
+Antonio,Engineering,88
+Michael,Engineering,39
+Randy,Social-Sciences,68
+Dilan,Health-Medicine,84
+Omar,Engineering,99
+Zachary,Arts,80
+Faison,Engineering,47
+Angel,Health-Medicine,100
+Gabriel,Health-Medicine,100
+John,Life-Sciences,70
+Leonard,Business,87
+Juan,Business,79
+Jonathan,Health-Medicine,100
+Christopher,Life-Sciences,59
+Brandon,Life-Sciences,72
+D'Angelo,Health-Medicine,90
+Justin,Social-Sciences,90
+Israel,Health-Medicine,81
+William,Arts,46
+David,Social-Sciences,69
+Drake,Social-Sciences,59
+Drake,Social-Sciences,76
+Nathan,Arts,71
+Trevon,Arts,74
+Aaron,Business,83
+Daniel,Health-Medicine,91
+Kevin,Health-Medicine,100
+Antonio,Engineering,56
+Donovan,Arts,75
+Kerris,Business,82
+Andre,Health-Medicine,72
+Dakota,Business,83
+Aaron,Life-Sciences,58
+Walter,Arts,75
+Isaiah,Arts,80
+Christian,Life-Sciences,67
+Dalton,Health-Medicine,100
+Jesse,Social-Sciences,32
+Diego,Health-Medicine,82
+Nathen,Life-Sciences,46
+Anthony,Life-Sciences,32
+Christian,Business,88
+David,Business,92
+Avery,Engineering,51
+Paul,Arts,63
+Derek,Arts,60
+Levi,Arts,76
+Lance,Social-Sciences,65
+Sonny,Engineering,50
+Shawn,Arts,65
+Leonardo,Engineering,78
+Yeng,Life-Sciences,39
+Leroy,Social-Sciences,74
+Gurnam,Life-Sciences,66
+Fernando,Arts,78
+Williams,Social-Sciences,62
+Roberto,Arts,65
+Teriuse,Business,94
+Nathaniel,Arts,88
+Chase,Social-Sciences,27
+Caleb,Business,87
+Tysza,Business,92
+Nico,Arts,59
+Manuel,Social-Sciences,61
+Patrick,Health-Medicine,92
+Peter,Health-Medicine,86
+Allen,Life-Sciences,50
+Joel,Social-Sciences,72