Mercurial > repos > iuc > datamash_ops
comparison datamash-ops.xml @ 3:419027d822d6 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/datamash commit 206ac70388ea11b168366f1e30bb44157e371c6e"
author | iuc |
---|---|
date | Sun, 10 Apr 2022 11:41:19 +0000 |
parents | 562f3c677828 |
children | 746e8e4bf929 |
comparison
equal
deleted
inserted
replaced
2:562f3c677828 | 3:419027d822d6 |
---|---|
1 <?xml version="1.0"?> | 1 <tool id="datamash_ops" name="Datamash" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> |
2 <tool id="datamash_ops" name="Datamash" version="@WRAPPER_VERSION@"> | |
3 <description>(operations on tabular data)</description> | 2 <description>(operations on tabular data)</description> |
4 <macros> | 3 <macros> |
5 <import>macros.xml</import> | 4 <import>macros.xml</import> |
6 </macros> | 5 </macros> |
7 <expand macro="requirements" /> | 6 <expand macro="requirements" /> |
12 $header_in | 11 $header_in |
13 $header_out | 12 $header_out |
14 $need_sort | 13 $need_sort |
15 $print_full_line | 14 $print_full_line |
16 $ignore_case | 15 $ignore_case |
17 #if str($grouping).strip() | 16 @FIELD_SEPARATOR@ |
18 --group '$grouping' | 17 #if str($grouping) != '' |
18 --group '$grouping' | |
19 #end if | 19 #end if |
20 #for $oper in $operations | 20 #for $oper in $operations |
21 ${oper.op_name} | 21 ${oper.op_name} |
22 ${oper.op_column} | 22 ${oper.op_column} |
23 #end for | 23 #end for |
24 < $in_file > '$out_file' | 24 < $in_file > '$out_file' |
25 ]]> | 25 ]]> |
26 </command> | 26 </command> |
27 <expand macro="inputs_outputs"> | 27 <expand macro="inputs_outputs"> |
28 <param help="Example: to group by the first and fourth fields, use 1,4." label="Group by fields" name="grouping" type="text"> | 28 <param argument="--group" name="grouping" type="text" label="Group by fields" help="Group consecutive rows with equal values in the chosen fields. If no columns are specified, each operation is performed in the entire input file. Comma separated list of column indices, e.g. 1,5"> |
29 <validator message="Invalid value in field. Allowed values are 0-9, space, comma." type="regex">^[0-9, ]*$</validator> | 29 <sanitizer invalid_char=""> |
30 <valid initial="string.digits"> | |
31 <add value="," /> | |
32 </valid> | |
33 <mapping initial="none"> | |
34 <add source=" " target=""/> | |
35 </mapping> | |
36 </sanitizer> | |
37 <validator message="Invalid value in field. Allowed is a comma separated list of integer values or the empty string" type="regex">(^$)|(^\s*\d+\s*(,\s*\d+\s*)*$)</validator> | |
30 </param> | 38 </param> |
31 <param falsevalue="" help="--header-in" label="Input file has a header line" name="header_in" truevalue="--header-in" type="boolean" /> | 39 <param argument="--sort" name="need_sort" type="boolean" truevalue="--sort" falsevalue="" label="Sort input" help="Input file must be sorted by the grouping columns. Enable this option to automatically sort the input."/> |
32 <param falsevalue="" help="--header-out" label="Print header line" name="header_out" truevalue="--header-out" type="boolean" /> | 40 <param argument="--header-in" type="boolean" truevalue="--header-in" falsevalue="" label="Input file has a header line" /> |
33 <param falsevalue="" help="--sort" label="Sort input" name="need_sort" truevalue="--sort" type="boolean" /> | 41 <param argument="--header-out" type="boolean" truevalue="--header-out" falsevalue="" label="Print header line" /> |
34 <param falsevalue="" help="--full" label="Print all fields from input file" name="print_full_line" truevalue="--full" type="boolean" /> | 42 <param argument="--full" name="print_full_line" type="boolean" truevalue="--full" falsevalue="" label="Print all fields from input file" /> |
35 <param falsevalue="" help="--ignore-case" label="Ignore case when grouping" name="ignore_case" truevalue="--ignore-case" type="boolean" /> | 43 <param argument="--ignore-case" type="boolean" truevalue="--ignore-case" falsevalue="" label="Ignore case when grouping" /> |
36 <repeat default="1" min="1" name="operations" title="Operation to perform on each group"> | 44 <repeat name="operations" default="1" min="1" title="Operation to perform on each group"> |
37 <param name="op_name" type="select" label="Type"> | 45 <param name="op_name" type="select" label="Type"> |
38 <option value="count">count</option> | 46 <option value="count">count</option> |
39 <option value="sum">sum</option> | 47 <option value="sum">sum</option> |
40 <option value="min">minimum</option> | 48 <option value="min">minimum</option> |
41 <option value="max">maximum</option> | 49 <option value="max">maximum</option> |
80 <param name="op_name" value="sum" /> | 88 <param name="op_name" value="sum" /> |
81 <param name="op_column" value="3" /> | 89 <param name="op_column" value="3" /> |
82 </repeat> | 90 </repeat> |
83 <output file="group_compute_output.txt" name="out_file" ftype="tabular" /> | 91 <output file="group_compute_output.txt" name="out_file" ftype="tabular" /> |
84 </test> | 92 </test> |
93 <test> | |
94 <param name="in_file" value="group_compute_input.txt" ftype="tsv" /> | |
95 <param name="grouping" value="2" /> | |
96 <param name="header_in" value="true" /> | |
97 <param name="header_out" value="true" /> | |
98 <param name="need_sort" value="true" /> | |
99 <param name="print_full_line" value="false" /> | |
100 <param name="ignore_case" value="false" /> | |
101 <repeat name="operations"> | |
102 <param name="op_name" value="sum" /> | |
103 <param name="op_column" value="3" /> | |
104 </repeat> | |
105 <output file="group_compute_output.txt" name="out_file" ftype="tsv" /> | |
106 </test> | |
107 <test> | |
108 <param name="in_file" value="group_compute_input.csv" ftype="csv" /> | |
109 <param name="grouping" value="2" /> | |
110 <param name="header_in" value="true" /> | |
111 <param name="header_out" value="true" /> | |
112 <param name="need_sort" value="true" /> | |
113 <param name="print_full_line" value="false" /> | |
114 <param name="ignore_case" value="false" /> | |
115 <repeat name="operations"> | |
116 <param name="op_name" value="sum" /> | |
117 <param name="op_column" value="3" /> | |
118 </repeat> | |
119 <output name="out_file" ftype="csv"> | |
120 <assert_contents> | |
121 <has_n_lines n="7"/> | |
122 <has_line line="Arts,1310"/> | |
123 </assert_contents> | |
124 </output> | |
125 </test> | |
85 </tests> | 126 </tests> |
86 <help> | 127 <help> |
87 <![CDATA[ | 128 <![CDATA[ |
88 @HELP_HEADER@ | 129 @HELP_HEADER@ |
89 | 130 |
97 | 138 |
98 - Find the average score in statistics course of college students, grouped by their college major. The input file has three fields (Name,Major,Score) and a header line:: | 139 - Find the average score in statistics course of college students, grouped by their college major. The input file has three fields (Name,Major,Score) and a header line:: |
99 | 140 |
100 Name Major Score | 141 Name Major Score |
101 Bryan Arts 68 | 142 Bryan Arts 68 |
143 Gabriel Health-Medicine 100 | |
102 Isaiah Arts 80 | 144 Isaiah Arts 80 |
103 Gabriel Health-Medicine 100 | |
104 Tysza Business 92 | 145 Tysza Business 92 |
105 Zackery Engineering 54 | 146 Zackery Engineering 54 |
106 ... | 147 ... |
107 ... | 148 ... |
108 | 149 |
109 - Grouping the input by the second column (*Major*), and performing operations **mean** and **sample standard deviation** on the third column (*Score*), gives:: | 150 - Grouping the input by the second column (*Major*), sorting the input, and performing operations **mean** and **sample standard deviation** on the third column (*Score*), gives:: |
110 | 151 |
111 GroupBy(Major) mean(Score) sstdev(Score) | 152 GroupBy(Major) mean(Score) sstdev(Score) |
112 Arts 68.9474 10.4215 | 153 Arts 68.9474 10.4215 |
113 Business 87.3636 5.18214 | 154 Business 87.3636 5.18214 |
114 Engineering 66.5385 19.8814 | 155 Engineering 66.5385 19.8814 |
115 Health-Medicine 90.6154 9.22441 | 156 Health-Medicine 90.6154 9.22441 |
116 Life-Sciences 55.3333 20.606 | 157 Life-Sciences 55.3333 20.606 |
117 Social-Sciences 60.2667 17.2273 | 158 Social-Sciences 60.2667 17.2273 |
159 | |
160 Note that input needs sorting here, since the column used for grouping (*Major*) is not sorted. | |
118 | 161 |
119 This sample file is available at http://www.gnu.org/software/datamash . | 162 This sample file is available at http://www.gnu.org/software/datamash . |
120 | 163 |
121 **Example 2** | 164 **Example 2** |
122 | 165 |