Mercurial > repos > xuebing > sharplabtool
comparison tools/stats/grouping.xml @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 <tool id="Grouping1" name="Group" version="2.0.0"> | |
2 <description>data by a column and perform aggregate operation on other columns.</description> | |
3 <command interpreter="python"> | |
4 grouping.py | |
5 $out_file1 | |
6 $input1 | |
7 $groupcol | |
8 $ignorecase | |
9 #for $op in $operations | |
10 '${op.optype} | |
11 ${op.opcol} | |
12 ${op.opround}' | |
13 #end for | |
14 </command> | |
15 <inputs> | |
16 <param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/> | |
17 <param name="groupcol" label="Group by column" type="data_column" data_ref="input1" /> | |
18 <param name="ignorecase" type="boolean" truevalue="1" falsevalue="0"> | |
19 <label>Ignore case while grouping?</label> | |
20 </param> | |
21 <repeat name="operations" title="Operation"> | |
22 <param name="optype" type="select" label="Type"> | |
23 <option value="mean">Mean</option> | |
24 <option value="median">Median</option> | |
25 <option value="mode">Mode</option> | |
26 <option value="max">Maximum</option> | |
27 <option value="min">Minimum</option> | |
28 <option value="sum">Sum</option> | |
29 <option value="length">Count</option> | |
30 <option value="unique">Count Distinct</option> | |
31 <option value="cat">Concatenate</option> | |
32 <option value="cat_uniq">Concatenate Distinct</option> | |
33 <option value="random">Randomly pick</option> | |
34 <option value="std">Standard deviation</option> | |
35 </param> | |
36 <param name="opcol" label="On column" type="data_column" data_ref="input1" /> | |
37 <param name="opround" type="select" label="Round result to nearest integer?"> | |
38 <option value="no">NO</option> | |
39 <option value="yes">YES</option> | |
40 </param> | |
41 </repeat> | |
42 </inputs> | |
43 <outputs> | |
44 <data format="tabular" name="out_file1" /> | |
45 </outputs> | |
46 <requirements> | |
47 <requirement type="python-module">numpy</requirement> | |
48 </requirements> | |
49 <tests> | |
50 <!-- Test valid data --> | |
51 <test> | |
52 <param name="input1" value="1.bed"/> | |
53 <param name="groupcol" value="1"/> | |
54 <param name="ignorecase" value="true"/> | |
55 <param name="optype" value="mean"/> | |
56 <param name="opcol" value="2"/> | |
57 <param name="opround" value="no"/> | |
58 <output name="out_file1" file="groupby_out1.dat"/> | |
59 </test> | |
60 <!-- Long case but test framework doesn't allow yet | |
61 <test> | |
62 <param name="input1" value="1.bed"/> | |
63 <param name="groupcol" value="1"/> | |
64 <param name="ignorecase" value="false"/> | |
65 <param name="operations" value='[{"opcol": "2", "__index__": 0, "optype": "mean", "opround": "no"}, {"opcol": "2", "__index__": 1, "optype": "median", "opround": "no"}, {"opcol": "6", "__index__": 2, "optype": "mode", "opround": "no"}, {"opcol": "2", "__index__": 3, "optype": "max", "opround": "no"}, {"opcol": "2", "__index__": 4, "optype": "min", "opround": "no"}, {"opcol": "2", "__index__": 5, "optype": "sum", "opround": "no"}, {"opcol": "1", "__index__": 6, "optype": "length", "opround": "no"}, {"opcol": "1", "__index__": 7, "optype": "unique", "opround": "no"}, {"opcol": "1", "__index__": 8, "optype": "cat", "opround": "no"}, {"opcol": "6", "__index__": 9, "optype": "cat_uniq", "opround": "no"}, {"opcol": "2", "__index__": 10, "optype": "random", "opround": "no"}, {"opcol": "2", "__index__": 11, "optype": "std", "opround": "no"}]'/> | |
66 <output name="out_file1" file="groupby_out3.tabular"/> | |
67 </test> | |
68 --> | |
69 <!-- Test data with an invalid value in a column. Can't do it because test framework doesn't allow testing of errors | |
70 <test> | |
71 <param name="input1" value="1.tabular"/> | |
72 <param name="groupcol" value="1"/> | |
73 <param name="ignorecase" value="true"/> | |
74 <param name="optype" value="mean"/> | |
75 <param name="opcol" value="2"/> | |
76 <param name="opround" value="no"/> | |
77 <output name="out_file1" file="groupby_out2.dat"/> | |
78 </test> | |
79 --> | |
80 </tests> | |
81 <help> | |
82 | |
83 .. class:: infomark | |
84 | |
85 **TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* | |
86 | |
87 ----- | |
88 | |
89 **Syntax** | |
90 | |
91 This tool allows you to group the input dataset by a particular column and perform aggregate functions: Mean, Median, Mode, Sum, Max, Min, Count, Concatenate, and Randomly pick on any column(s). | |
92 | |
93 The Concatenate function will take, for each group, each item in the specified column and build a comma delimited list. Concatenate Unique will do the same but will build a list of unique items with no repetition. | |
94 | |
95 Count and Count Unique are equivalent to Concatenate and Concatenate Unique, but will only count the number of items and will return an integer. | |
96 | |
97 - If multiple modes are present, all are reported. | |
98 | |
99 ----- | |
100 | |
101 **Example** | |
102 | |
103 - For the following input:: | |
104 | |
105 chr22 1000 1003 TTT | |
106 chr22 2000 2003 aaa | |
107 chr10 2200 2203 TTT | |
108 chr10 1200 1203 ttt | |
109 chr22 1600 1603 AAA | |
110 | |
111 - **Grouping on column 4** while ignoring case, and performing operation **Count on column 1** will return:: | |
112 | |
113 AAA 2 | |
114 TTT 3 | |
115 | |
116 - **Grouping on column 4** while not ignoring case, and performing operation **Count on column 1** will return:: | |
117 | |
118 aaa 1 | |
119 AAA 1 | |
120 ttt 1 | |
121 TTT 2 | |
122 </help> | |
123 </tool> |