sharplabtool: tools/stats/grouping.xml comparison

comparison tools/stats/grouping.xml @ 0:9071e359b9a3

Uploaded

author	xuebing
date	Fri, 09 Mar 2012 19:37:19 -0500
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:9071e359b9a3
+<tool id="Grouping1" name="Group" version="2.0.0">
+<description>data by a column and perform aggregate operation on other columns.</description>
+<command interpreter="python">
+grouping.py
+$out_file1
+$input1
+$groupcol
+$ignorecase
+#for $op in $operations
+'${op.optype}
+${op.opcol}
+${op.opround}'
+#end for
+</command>
+<inputs>
+<param format="tabular" name="input1" type="data" label="Select data" help="Dataset missing? See TIP below."/>
+<param name="groupcol" label="Group by column" type="data_column" data_ref="input1" />
+<param name="ignorecase" type="boolean" truevalue="1" falsevalue="0">
+<label>Ignore case while grouping?</label>
+</param>
+<repeat name="operations" title="Operation">
+<param name="optype" type="select" label="Type">
+<option value="mean">Mean</option>
+<option value="median">Median</option>
+<option value="mode">Mode</option>
+<option value="max">Maximum</option>
+<option value="min">Minimum</option>
+<option value="sum">Sum</option>
+<option value="length">Count</option>
+<option value="unique">Count Distinct</option>
+<option value="cat">Concatenate</option>
+<option value="cat_uniq">Concatenate Distinct</option>
+<option value="random">Randomly pick</option>
+<option value="std">Standard deviation</option>
+</param>
+<param name="opcol" label="On column" type="data_column" data_ref="input1" />
+<param name="opround" type="select" label="Round result to nearest integer?">
+<option value="no">NO</option>
+<option value="yes">YES</option>
+</param>
+</repeat>
+</inputs>
+<outputs>
+<data format="tabular" name="out_file1" />
+</outputs>
+<requirements>
+<requirement type="python-module">numpy</requirement>
+</requirements>
+<tests>
+<!-- Test valid data -->
+<test>
+<param name="input1" value="1.bed"/>
+<param name="groupcol" value="1"/>
+<param name="ignorecase" value="true"/>
+<param name="optype" value="mean"/>
+<param name="opcol" value="2"/>
+<param name="opround" value="no"/>
+<output name="out_file1" file="groupby_out1.dat"/>
+</test>
+<!-- Long case but test framework doesn't allow yet
+<test>
+<param name="input1" value="1.bed"/>
+<param name="groupcol" value="1"/>
+<param name="ignorecase" value="false"/>
+<param name="operations" value='[{"opcol": "2", "__index__": 0, "optype": "mean", "opround": "no"}, {"opcol": "2", "__index__": 1, "optype": "median", "opround": "no"}, {"opcol": "6", "__index__": 2, "optype": "mode", "opround": "no"}, {"opcol": "2", "__index__": 3, "optype": "max", "opround": "no"}, {"opcol": "2", "__index__": 4, "optype": "min", "opround": "no"}, {"opcol": "2", "__index__": 5, "optype": "sum", "opround": "no"}, {"opcol": "1", "__index__": 6, "optype": "length", "opround": "no"}, {"opcol": "1", "__index__": 7, "optype": "unique", "opround": "no"}, {"opcol": "1", "__index__": 8, "optype": "cat", "opround": "no"}, {"opcol": "6", "__index__": 9, "optype": "cat_uniq", "opround": "no"}, {"opcol": "2", "__index__": 10, "optype": "random", "opround": "no"}, {"opcol": "2", "__index__": 11, "optype": "std", "opround": "no"}]'/>
+<output name="out_file1" file="groupby_out3.tabular"/>
+</test>
+-->
+<!-- Test data with an invalid value in a column. Can't do it because test framework doesn't allow testing of errors
+<test>
+<param name="input1" value="1.tabular"/>
+<param name="groupcol" value="1"/>
+<param name="ignorecase" value="true"/>
+<param name="optype" value="mean"/>
+<param name="opcol" value="2"/>
+<param name="opround" value="no"/>
+<output name="out_file1" file="groupby_out2.dat"/>
+</test>
+-->
+</tests>
+<help>
+.. class:: infomark
+**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*
+-----
+**Syntax**
+This tool allows you to group the input dataset by a particular column and perform aggregate functions: Mean, Median, Mode, Sum, Max, Min, Count, Concatenate, and Randomly pick on any column(s).
+The Concatenate function will take, for each group, each item in the specified column and build a comma delimited list. Concatenate Unique will do the same but will build a list of unique items with no repetition.
+Count and Count Unique are equivalent to Concatenate and Concatenate Unique, but will only count the number of items and will return an integer.
+- If multiple modes are present, all are reported.
+-----
+**Example**
+- For the following input::
+chr22  1000  1003  TTT
+chr22  2000  2003  aaa
+chr10  2200  2203  TTT
+chr10  1200  1203  ttt
+chr22  1600  1603  AAA
+- **Grouping on column 4** while ignoring case, and performing operation **Count on column 1** will return::
+AAA    2
+TTT    3
+- **Grouping on column 4** while not ignoring case, and performing operation **Count on column 1** will return::
+aaa    1
+AAA    1
+ttt    1
+TTT    2
+</help>
+</tool>

Mercurial > repos > xuebing > sharplabtool

comparison tools/stats/grouping.xml @ 0:9071e359b9a3