0
|
1 <tool id="Grouping1" name="Group" version="2.0.0">
|
|
2 <description>data by a column and perform aggregate operation on other columns.</description>
|
|
3 <command interpreter="python">
|
|
4 grouping.py
|
|
5 $out_file1
|
|
6 $input1
|
|
7 $groupcol
|
|
8 $ignorecase
|
|
9 #for $op in $operations
|
|
10 '${op.optype}
|
|
11 ${op.opcol}
|
|
12 ${op.opround}'
|
|
13 #end for
|
|
14 </command>
|
|
15 <inputs>
|
|
16 <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
|
|
17 <param name="groupcol" label="Group by column" type="data_column" data_ref="input1" />
|
|
18 <param name="ignorecase" type="boolean" truevalue="1" falsevalue="0">
|
|
19 <label>Ignore case while grouping?</label>
|
|
20 </param>
|
|
21 <repeat name="operations" title="Operation">
|
|
22 <param name="optype" type="select" label="Type">
|
|
23 <option value="mean">Mean</option>
|
|
24 <option value="median">Median</option>
|
|
25 <option value="mode">Mode</option>
|
|
26 <option value="max">Maximum</option>
|
|
27 <option value="min">Minimum</option>
|
|
28 <option value="sum">Sum</option>
|
|
29 <option value="length">Count</option>
|
|
30 <option value="unique">Count Distinct</option>
|
|
31 <option value="cat">Concatenate</option>
|
|
32 <option value="cat_uniq">Concatenate Distinct</option>
|
|
33 <option value="random">Randomly pick</option>
|
|
34 <option value="std">Standard deviation</option>
|
|
35 </param>
|
|
36 <param name="opcol" label="On column" type="data_column" data_ref="input1" />
|
|
37 <param name="opround" type="select" label="Round result to nearest integer?">
|
|
38 <option value="no">NO</option>
|
|
39 <option value="yes">YES</option>
|
|
40 </param>
|
|
41 </repeat>
|
|
42 </inputs>
|
|
43 <outputs>
|
|
44 <data format="tabular" name="out_file1" />
|
|
45 </outputs>
|
|
46 <requirements>
|
|
47 <requirement type="python-module">numpy</requirement>
|
|
48 </requirements>
|
|
49 <tests>
|
|
50 <!-- Test valid data -->
|
|
51 <test>
|
|
52 <param name="input1" value="1.bed"/>
|
|
53 <param name="groupcol" value="1"/>
|
|
54 <param name="ignorecase" value="true"/>
|
|
55 <param name="optype" value="mean"/>
|
|
56 <param name="opcol" value="2"/>
|
|
57 <param name="opround" value="no"/>
|
|
58 <output name="out_file1" file="groupby_out1.dat"/>
|
|
59 </test>
|
|
60 <!-- Long case but test framework doesn't allow yet
|
|
61 <test>
|
|
62 <param name="input1" value="1.bed"/>
|
|
63 <param name="groupcol" value="1"/>
|
|
64 <param name="ignorecase" value="false"/>
|
|
65 <param name="operations" value='[{"opcol": "2", "__index__": 0, "optype": "mean", "opround": "no"}, {"opcol": "2", "__index__": 1, "optype": "median", "opround": "no"}, {"opcol": "6", "__index__": 2, "optype": "mode", "opround": "no"}, {"opcol": "2", "__index__": 3, "optype": "max", "opround": "no"}, {"opcol": "2", "__index__": 4, "optype": "min", "opround": "no"}, {"opcol": "2", "__index__": 5, "optype": "sum", "opround": "no"}, {"opcol": "1", "__index__": 6, "optype": "length", "opround": "no"}, {"opcol": "1", "__index__": 7, "optype": "unique", "opround": "no"}, {"opcol": "1", "__index__": 8, "optype": "cat", "opround": "no"}, {"opcol": "6", "__index__": 9, "optype": "cat_uniq", "opround": "no"}, {"opcol": "2", "__index__": 10, "optype": "random", "opround": "no"}, {"opcol": "2", "__index__": 11, "optype": "std", "opround": "no"}]'/>
|
|
66 <output name="out_file1" file="groupby_out3.tabular"/>
|
|
67 </test>
|
|
68 -->
|
|
69 <!-- Test data with an invalid value in a column. Can't do it because test framework doesn't allow testing of errors
|
|
70 <test>
|
|
71 <param name="input1" value="1.tabular"/>
|
|
72 <param name="groupcol" value="1"/>
|
|
73 <param name="ignorecase" value="true"/>
|
|
74 <param name="optype" value="mean"/>
|
|
75 <param name="opcol" value="2"/>
|
|
76 <param name="opround" value="no"/>
|
|
77 <output name="out_file1" file="groupby_out2.dat"/>
|
|
78 </test>
|
|
79 -->
|
|
80 </tests>
|
|
81 <help>
|
|
82
|
|
83 .. class:: infomark
|
|
84
|
|
85 **TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert*
|
|
86
|
|
87 -----
|
|
88
|
|
89 **Syntax**
|
|
90
|
|
91 This tool allows you to group the input dataset by a particular column and perform aggregate functions: Mean, Median, Mode, Sum, Max, Min, Count, Concatenate, and Randomly pick on any column(s).
|
|
92
|
|
93 The Concatenate function will take, for each group, each item in the specified column and build a comma delimited list. Concatenate Unique will do the same but will build a list of unique items with no repetition.
|
|
94
|
|
95 Count and Count Unique are equivalent to Concatenate and Concatenate Unique, but will only count the number of items and will return an integer.
|
|
96
|
|
97 - If multiple modes are present, all are reported.
|
|
98
|
|
99 -----
|
|
100
|
|
101 **Example**
|
|
102
|
|
103 - For the following input::
|
|
104
|
|
105 chr22 1000 1003 TTT
|
|
106 chr22 2000 2003 aaa
|
|
107 chr10 2200 2203 TTT
|
|
108 chr10 1200 1203 ttt
|
|
109 chr22 1600 1603 AAA
|
|
110
|
|
111 - **Grouping on column 4** while ignoring case, and performing operation **Count on column 1** will return::
|
|
112
|
|
113 AAA 2
|
|
114 TTT 3
|
|
115
|
|
116 - **Grouping on column 4** while not ignoring case, and performing operation **Count on column 1** will return::
|
|
117
|
|
118 aaa 1
|
|
119 AAA 1
|
|
120 ttt 1
|
|
121 TTT 2
|
|
122 </help>
|
|
123 </tool>
|