Mercurial > repos > miller-lab > genome_diversity
comparison rank_pathways.xml @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 95a05c1ef5d5 |
children | 184d14e4270d |
comparison
equal
deleted
inserted
replaced
26:91e835060ad2 | 27:8997f2ca8c7a |
---|---|
1 <tool id="gd_calc_freq" name="Rank Pathways" version="1.1.0"> | 1 <tool id="gd_calc_freq" name="Rank Pathways" version="1.2.0"> |
2 <description>: Assess the impact of a gene set on KEGG pathways</description> | 2 <description>: Assess the impact of a gene set on KEGG pathways</description> |
3 | 3 |
4 <command interpreter="python"> | 4 <command interpreter="python"> |
5 #if str($output_format) == 'a' | 5 #if $rank_by.choice == 'pct' |
6 rank_pathways_pct.py | 6 rank_pathways_pct.py |
7 #else if str($output_format) == 'b' | 7 --input '$rank_by.input1' |
8 --columnENSEMBLT '$rank_by.t_col1' | |
9 --inBckgrndfile '$rank_by.input2' | |
10 --columnENSEMBLTBckgrnd '$rank_by.t_col2' | |
11 --columnKEGGBckgrnd '$rank_by.k_col2' | |
12 --statsTest '$rank_by.stat' | |
13 --output '$output' | |
14 #else if $rank_by.choice == 'paths' | |
8 calclenchange.py | 15 calclenchange.py |
16 '--loc_file=${GALAXY_DATA_INDEX_DIR}/gd.rank.loc' | |
17 '--species=${rank_by.input.metadata.dbkey}' | |
18 '--input=${rank_by.input}' | |
19 '--output=${output}' | |
20 '--posKEGGclmn=${rank_by.kpath}' | |
21 '--KEGGgeneposcolmn=${rank_by.kgene}' | |
9 #end if | 22 #end if |
10 "--loc_file=${GALAXY_DATA_INDEX_DIR}/gd.rank.loc" | |
11 "--species=${input.metadata.dbkey}" | |
12 "--input=${input}" | |
13 "--output=${output}" | |
14 "--posKEGGclmn=${kpath}" | |
15 "--KEGGgeneposcolmn=${kgene}" | |
16 </command> | 23 </command> |
17 | 24 |
18 <inputs> | 25 <inputs> |
19 <param name="input" type="data" format="tab" label="Dataset" /> | 26 <conditional name="rank_by"> |
20 <param name="kgene" type="data_column" data_ref="input" label="Column with KEGG gene ID" /> | 27 <param name="choice" type="select" label="Rank by"> |
21 <param name="kpath" type="data_column" data_ref="input" numerical="false" label="Column with KEGG pathways" /> | 28 <option value="pct" selected="true">percentage of genes affected</option> |
22 <param name="output_format" type="select" label="Output"> | 29 <option value="paths">change in length and number of paths</option> |
23 <option value="a" selected="true">ranked by percentage of genes affected</option> | 30 </param> |
24 <option value="b">ranked by change in length and number of paths</option> | 31 <when value="pct"> |
25 </param> | 32 <!-- using fields similar to the Rank Terms tool --> |
33 <param name="input1" type="data" format="tabular" label="Query dataset" /> | |
34 <param name="t_col1" type="data_column" data_ref="input1" label="Column with ENSEMBL transcript codes" /> | |
35 <param name="input2" type="data" format="tabular" label="Background dataset" /> | |
36 <param name="t_col2" type="data_column" data_ref="input2" label="Column with ENSEMBL transcript codes" /> | |
37 <param name="k_col2" type="data_column" data_ref="input2" label="Column with KEGG pathways" /> | |
38 <param name="stat" type="select" label="Statistic for determining enrichment/depletion"> | |
39 <option value="fisher" selected="true">two-tailed Fisher's exact test</option> | |
40 <option value="hypergeometric">hypergeometric test</option> | |
41 <option value="binomial">binomial probability</option> | |
42 </param> | |
43 </when> | |
44 <when value="paths"> | |
45 <param name="input" type="data" format="tabular" label="Dataset" /> | |
46 <param name="kgene" type="data_column" data_ref="input" label="Column with KEGG gene ID" /> | |
47 <param name="kpath" type="data_column" data_ref="input" numerical="false" label="Column with KEGG pathways" /> | |
48 </when> | |
49 </conditional> | |
26 </inputs> | 50 </inputs> |
27 | 51 |
28 <outputs> | 52 <outputs> |
29 <data name="output" format="tabular" /> | 53 <data name="output" format="tabular" /> |
30 </outputs> | 54 </outputs> |
31 | 55 |
32 <tests> | 56 <tests> |
33 <test> | 57 <test> |
34 <param name="input" value="test_in/sample.gd_sap" ftype="gd_sap" /> | |
35 <param name="kgene" value="10" /> | |
36 <param name="kpath" value="12" /> | |
37 <param name="output_format" value="a" /> | |
38 <output name="output" file="test_out/rank_pathways/rank_pathways.tabular" /> | |
39 </test> | 58 </test> |
40 </tests> | 59 </tests> |
41 | 60 |
42 <help> | 61 <help> |
43 | 62 |
44 **Dataset formats** | 63 **Dataset formats** |
45 | 64 |
46 The input and output datasets are in tabular_ format. | 65 All of the input and output datasets are in tabular_ format. |
47 The input dataset must have columns with KEGG gene ID and pathways. | 66 The input dataset must have columns with KEGG gene ID and pathways. |
48 The output dataset is described below. | 67 [Need to update this, since input columns now depend on the "Rank by" choice.] |
68 The output datasets are described below. | |
49 (`Dataset missing?`_) | 69 (`Dataset missing?`_) |
50 | 70 |
51 .. _tabular: ./static/formatHelp.html#tab | 71 .. _tabular: ./static/formatHelp.html#tab |
52 .. _Dataset missing?: ./static/formatHelp.html | 72 .. _Dataset missing?: ./static/formatHelp.html |
53 | 73 |
54 ----- | 74 ----- |
55 | 75 |
56 **What it does** | 76 **What it does** |
57 | 77 |
58 This tool produces a table ranking the pathways based on the percentage | 78 This tool produces a table ranking the pathways based on the percentage |
59 of genes in an input dataset, out of the total in each pathway. | 79 of genes in an input dataset, out of the total in each pathway |
80 [please clarify w.r.t. query and background datasets]. | |
60 Alternatively, the tool ranks the pathways based on the change in | 81 Alternatively, the tool ranks the pathways based on the change in |
61 length and number of paths connecting sources and sinks. This change is | 82 length and number of paths connecting sources and sinks. This change is |
62 calculated between graphs representing pathways with and without excluding | 83 calculated between graphs representing pathways with and without excluding |
63 the nodes that represent the genes in an input list. Sources are all | 84 the nodes that represent the genes in an input list. Sources are all |
64 the nodes representing the initial reactants/products in the pathway. | 85 the nodes representing the initial reactants/products in the pathway. |
65 Sinks are all the nodes representing the final reactants/products in | 86 Sinks are all the nodes representing the final reactants/products in |
66 the pathway. | 87 the pathway. |
67 | 88 |
68 If pathways are ranked by percentage of genes affected, the output is | 89 If pathways are ranked by percentage of genes affected, the output contains |
69 a tabular dataset with the following columns: | 90 a row for each KEGG pathway, with the following columns: |
70 | 91 |
71 1. number of genes in the pathway present in the input dataset | 92 1. count: the number of genes in the query set that are in this pathway |
72 2. percentage of the total genes in the pathway included in the input dataset | 93 2. representation: the percentage of this pathway's genes (from the background dataset) that appear in the query set |
73 3. rank of the frequency (from high freq to low freq) | 94 3. ranking of this pathway, based on its representation ("1" is highest) |
74 4. Fisher probability of enrichment/depletion of pathway genes in the input dataset | 95 4. probability of depletion of this pathway in the query dataset |
75 5. name of the pathway | 96 5. probability of enrichment of this pathway in the query dataset |
97 6. KEGG pathway | |
76 | 98 |
77 If pathways are ranked by change in length and number of paths, the | 99 If pathways are ranked by change in length and number of paths, the |
78 output is a tabular dataset with the following columns: | 100 output is a tabular dataset with the following columns: |
79 | 101 |
80 1. change in the mean length of paths between sources and sinks | 102 1. change in the mean length of paths between sources and sinks |
95 | 117 |
96 Contig39_chr1_3261104_3261850 414 chr1 3261546 ENSCAFT00000000001 ENSCAFP00000000001 S 667 F 476153 probably damaging cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways | 118 Contig39_chr1_3261104_3261850 414 chr1 3261546 ENSCAFT00000000001 ENSCAFP00000000001 S 667 F 476153 probably damaging cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways |
97 Contig62_chr1_19011969_19012646 265 chr1 19012240 ENSCAFT00000000144 ENSCAFP00000000125 * 161 R 483960 probably damaging N | 119 Contig62_chr1_19011969_19012646 265 chr1 19012240 ENSCAFT00000000144 ENSCAFP00000000125 * 161 R 483960 probably damaging N |
98 etc. | 120 etc. |
99 | 121 |
100 - output ranked by percentage of genes affected:: | 122 - output ranked by percentage of genes affected [need new sample output with more columns]:: |
101 | 123 |
102 3 0.25 1 cfa03450=Non-homologous end-joining | 124 3 0.25 1 cfa03450=Non-homologous end-joining |
103 1 0.25 1 cfa00750=Vitamin B6 metabolism | 125 1 0.25 1 cfa00750=Vitamin B6 metabolism |
104 2 0.2 3 cfa00290=Valine, leucine and isoleucine biosynthesis | 126 2 0.2 3 cfa00290=Valine, leucine and isoleucine biosynthesis |
105 3 0.18 4 cfa00770=Pantothenate and CoA biosynthesis | 127 3 0.18 4 cfa00770=Pantothenate and CoA biosynthesis |
106 etc. | 128 etc. |
107 | 129 |
108 - output ranked by change in length and number of paths:: | 130 - output ranked by change in length and number of paths:: |
109 | 131 |
110 3.64 8.44 4.8 2 4 9 5 1 cfa00260=Glycine, serine and threonine metabolism | 132 3.64 8.44 4.8 2 4 9 5 1 cfa00260=Glycine, serine and threonine metabolism |
111 7.6 9.6 2 1 3 5 2 2 cfa00240=Pyrimidine metabolism | 133 7.6 9.6 2 1 3 5 2 2 cfa00240=Pyrimidine metabolism |
112 0.05 2.67 2.62 6 1 30 29 3 cfa00982=Drug metabolism - cytochrome P450 | 134 0.05 2.67 2.62 6 1 30 29 3 cfa00982=Drug metabolism - cytochrome P450 |
113 -0.08 8.33 8.41 84 1 30 29 3 cfa00564=Glycerophospholipid metabolism | 135 -0.08 8.33 8.41 84 1 30 29 3 cfa00564=Glycerophospholipid metabolism |
114 etc. | 136 etc. |
115 | 137 |
116 </help> | 138 </help> |
117 </tool> | 139 </tool> |