Mercurial > repos > miller-lab > genome_diversity

diff rank_terms.xml @ 22:95a05c1ef5d5
update to devshed revision aaece207bd01
author: Richard Burhans <burhans@bx.psu.edu>
date: Mon, 11 Mar 2013 11:28:06 -0400
children: 8997f2ca8c7a
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rank_terms.xml	Mon Mar 11 11:28:06 2013 -0400
@@ -0,0 +1,56 @@
+<tool id="gd_rank_terms" name="Rank Terms" version="1.0.0">
+  <description>: Assess the enrichment/depletion of a gene set for GO terms</description>
+
+  <command interpreter="python">
+    #set $t_col1_0 = int(str($t_col1)) - 1
+    #set $t_col2_0 = int(str($t_col2)) - 1
+    #set $g_col2_0 = int(str($g_col2)) - 1
+    rank_terms.py --input "$input1" --columnENSEMBLT $t_col1_0 --inExtnddfile "$input2" --columnENSEMBLTExtndd $t_col2_0 --columnGOExtndd $g_col2_0 --output "$output"
+  </command>
+
+  <inputs>
+    <param name="input1" type="data" format="tabular" label="Query dataset" />
+    <param name="t_col1" type="data_column" data_ref="input1" label="Column with ENSEMBL transcript codes" />
+
+    <param name="input2" type="data" format="tabular" label="Background dataset" />
+    <param name="t_col2" type="data_column" data_ref="input2" label="Column with ENSEMBL transcript codes" />
+    <param name="g_col2" type="data_column" data_ref="input2" label="Column with GO terms" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tabular" />
+  </outputs>
+
+  <help>
+
+**Dataset formats**
+
+All of the input and output datasets are in tabular_ format.
+The query dataset has a column containing ENSEMBL transcript codes for
+the gene set of interest, while the background dataset has one column
+with ENSEMBL transcript codes and another with GO terms, for some
+larger universe of genes.
+The output dataset is described below.
+(`Dataset missing?`_)
+
+.. _tabular: ./static/formatHelp.html#tab
+.. _Dataset missing?: ./static/formatHelp.html
+
+-----
+
+**What it does**
+
+Given a query set of genes from a larger background dataset, this tool
+evaluates the statistical over- or under-representation of Gene Ontology
+terms in the query set, using a two-tailed Fisher's exact test.
+
+The output contains a row for each GO term, with the following columns:
+
+1. count: the number of genes in the query set that are in this GO category
+2. representation: the percentage of this category's genes (from the background dataset) that appear in the query set
+3. ranking of this term, based on its representation ("1" is highest)
+4. Fisher probability of enrichment/depletion of this GO category in the query dataset
+5. GO term
+
+  </help>
+</tool>
author	Richard Burhans <burhans@bx.psu.edu>
date	Mon, 11 Mar 2013 11:28:06 -0400
parents
children	8997f2ca8c7a