Mercurial > repos > miller-lab > genome_diversity
annotate rank_terms.xml @ 32:03c22b722882
remove BeautifulSoup dependency
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 20 Sep 2013 13:54:23 -0400 |
parents | a631c2f6d913 |
children |
rev | line source |
---|---|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
1 <tool id="gd_rank_terms" name="Rank Terms" version="1.1.0"> |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
2 <description>: Assess the enrichment/depletion of a gene set for GO terms</description> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
3 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
4 <command interpreter="python"> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
5 #set $t_col1_0 = int(str($t_col1)) - 1 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
6 #set $t_col2_0 = int(str($t_col2)) - 1 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
7 #set $g_col2_0 = int(str($g_col2)) - 1 |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
8 rank_terms.py --input "$input1" --columnENSEMBLT $t_col1_0 --inExtnddfile "$input2" --columnENSEMBLTExtndd $t_col2_0 --columnGOExtndd $g_col2_0 --statsTest "$stat" --output "$output" |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
9 </command> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
10 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
11 <inputs> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
12 <param name="input1" type="data" format="tabular" label="Query dataset" /> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
13 <param name="t_col1" type="data_column" data_ref="input1" label="Column with ENSEMBL transcript codes" /> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
14 <param name="input2" type="data" format="tabular" label="Background dataset" /> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
15 <param name="t_col2" type="data_column" data_ref="input2" label="Column with ENSEMBL transcript codes" /> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
16 <param name="g_col2" type="data_column" data_ref="input2" label="Column with GO terms" /> |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
17 <param name="stat" type="select" label="Statistic for determining enrichment/depletion"> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
18 <option value="fisher" selected="true">two-tailed Fisher's exact test</option> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
19 <option value="hypergeometric">hypergeometric test</option> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
20 <option value="binomial">binomial probability</option> |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
21 </param> |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
22 </inputs> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
23 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
24 <outputs> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
25 <data name="output" format="tabular" /> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
26 </outputs> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
27 |
31
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
28 <requirements> |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
29 <requirement type="package" version="0.1.4">fisher</requirement> |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
30 </requirements> |
a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
Richard Burhans <burhans@bx.psu.edu>
parents:
27
diff
changeset
|
31 |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
32 <help> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
33 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
34 **Dataset formats** |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
35 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
36 All of the input and output datasets are in tabular_ format. |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
37 The query dataset has a column containing ENSEMBL transcript codes for |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
38 the gene set of interest, while the background dataset has one column |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
39 with ENSEMBL transcript codes and another with GO terms, for some |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
40 larger universe of genes. |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
41 The output dataset is described below. |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
42 (`Dataset missing?`_) |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
43 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
44 .. _tabular: ./static/formatHelp.html#tab |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
45 .. _Dataset missing?: ./static/formatHelp.html |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
46 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
47 ----- |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
48 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
49 **What it does** |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
50 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
51 Given a query set of genes from a larger background dataset, this tool |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
52 evaluates the over- or under-representation of Gene Ontology terms in the |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
53 query set, using the specified statistical test. |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
54 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
55 The output contains a row for each GO term, with the following columns: |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
56 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
57 1. count: the number of genes in the query set that are in this GO category |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
58 2. representation: the percentage of this category's genes (from the background dataset) that appear in the query set |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
59 3. ranking of this term, based on its representation ("1" is highest) |
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
60 4. probability of depletion of this GO category in the query dataset |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
61 5. probability of enrichment of this GO category in the query dataset |
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
22
diff
changeset
|
62 6. GO term |
22
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
63 |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
64 </help> |
95a05c1ef5d5
update to devshed revision aaece207bd01
Richard Burhans <burhans@bx.psu.edu>
parents:
diff
changeset
|
65 </tool> |