annotate rank_pathways.xml @ 21:d6b961721037

Miller Lab Devshed version 4c04e35b18f6
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 05 Nov 2012 12:44:17 -0500
parents 8ae67e9fb6ff
children 95a05c1ef5d5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
14
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
1 <tool id="gd_calc_freq" name="Rank Pathways" version="1.0.0">
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
2 <description>: Assess the impact of gene sets on pathways</description>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
3
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
4 <command interpreter="python">
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
5 #if str($output_format) == 'a'
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
6 calctfreq.py
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
7 #else if str($output_format) == 'b'
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
8 calclenchange.py
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
9 #end if
21
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
10 "--loc_file=${GALAXY_DATA_INDEX_DIR}/gd.rank.loc"
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
11 "--species=${input.metadata.dbkey}"
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
12 "--input=${input}"
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
13 "--output=${output}"
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
14 "--posKEGGclmn=${kpath}"
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
15 "--KEGGgeneposcolmn=${kgene}"
14
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
16 </command>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
17
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
18 <inputs>
21
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
19 <param name="input" type="data" format="tab" label="Dataset" />
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
20 <param name="kgene" type="data_column" data_ref="input" label="Column with KEGG gene ID" />
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
21 <param name="kpath" type="data_column" data_ref="input" numerical="false" label="Column with KEGG pathways" />
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
22 <param name="output_format" type="select" label="Output">
14
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
23 <option value="a" selected="true">ranked by percentage of genes affected</option>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
24 <option value="b">ranked by change in length and number of paths</option>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
25 </param>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
26 </inputs>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
27
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
28 <outputs>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
29 <data name="output" format="tabular" />
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
30 </outputs>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
31
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
32 <tests>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
33 <test>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
34 <param name="input" value="test_in/sample.gd_sap" ftype="gd_sap" />
21
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
35 <param name="kgene" value="10" />
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
36 <param name="kpath" value="12" />
14
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
37 <param name="output_format" value="a" />
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
38 <output name="output" file="test_out/rank_pathways/rank_pathways.tabular" />
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
39 </test>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
40 </tests>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
41
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
42 <help>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
43
21
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
44 **Dataset formats**
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
45
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
46 The input and output datasets are in tabular_ format.
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
47 The input dataset must have columns with KEGG gene ID and pathways.
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
48 The output dataset is described below.
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
49 (`Dataset missing?`_)
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
50
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
51 .. _tabular: ./static/formatHelp.html#tab
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
52 .. _Dataset missing?: ./static/formatHelp.html
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
53
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
54 -----
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
55
14
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
56 **What it does**
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
57
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
58 This tool produces a table ranking the pathways based on the percentage
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
59 of genes in an input dataset, out of the total in each pathway.
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
60 Alternatively, the tool ranks the pathways based on the change in
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
61 length and number of paths connecting sources and sinks. This change is
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
62 calculated between graphs representing pathways with and without excluding
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
63 the nodes that represent the genes in an input list. Sources are all
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
64 the nodes representing the initial reactants/products in the pathway.
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
65 Sinks are all the nodes representing the final reactants/products in
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
66 the pathway.
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
67
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
68 If pathways are ranked by percentage of genes affected, the output is
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
69 a tabular dataset with the following columns:
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
70
21
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
71 1. number of genes in the pathway present in the input dataset
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
72 2. percentage of the total genes in the pathway included in the input dataset
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
73 3. rank of the frequency (from high freq to low freq)
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
74 4. name of the pathway
14
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
75
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
76 If pathways are ranked by change in length and number of paths, the
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
77 output is a tabular dataset with the following columns:
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
78
21
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
79 1. change in the mean length of paths between sources and sinks
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
80 2. mean length of paths between sources and sinks in the pathway including the genes in the input dataset. If the pathway do not have sources/sinks, the length is assumed to be infinite (I)
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
81 3. mean length of paths between sources and sinks in the pathway excluding the genes in the input dataset. If the pathway do not have sources/sinks, the length is assumed to be infinite (I)
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
82 4. rank of the change in the mean length of paths between sources and sinks (from high change to low change)
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
83 5. change in the number of paths between sources and sinks
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
84 6. number of paths between sources and sinks in the pathway including the genes in the input dataset. If the pathway do not have sources/sinks, it is assumed to be a circuit (C)
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
85 7. number of paths between sources and sinks in the pathway excluding the genes in the input dataset. If the pathway do not have sources/sinks, it is assumed to be a circuit (C)
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
86 8. rank of the change in the number of paths between sources and sinks (from high change to low change)
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
87 9. name of the pathway
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
88
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
89 -----
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
90
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
91 **Examples**
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
92
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
93 - input (column 10 for KEGG gene ID, column 12 for KEGG pathways)::
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
94
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
95 Contig39_chr1_3261104_3261850 414 chr1 3261546 ENSCAFT00000000001 ENSCAFP00000000001 S 667 F 476153 probably damaging cfa00230=Purine metabolism.cfa00500=Starch and sucrose metabolism.cfa00740=Riboflavin metabolism.cfa00760=Nicotinate and nicotinamide metabolism.cfa00770=Pantothenate and CoA biosynthesis.cfa01100=Metabolic pathways
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
96 Contig62_chr1_19011969_19012646 265 chr1 19012240 ENSCAFT00000000144 ENSCAFP00000000125 * 161 R 483960 probably damaging N
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
97 etc.
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
98
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
99 - output ranked by percentage of genes affected::
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
100
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
101 3 0.25 1 cfa03450=Non-homologous end-joining
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
102 1 0.25 1 cfa00750=Vitamin B6 metabolism
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
103 2 0.2 3 cfa00290=Valine, leucine and isoleucine biosynthesis
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
104 3 0.18 4 cfa00770=Pantothenate and CoA biosynthesis
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
105 etc.
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
106
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
107 - output ranked by change in length and number of paths::
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
108
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
109 3.64 8.44 4.8 2 4 9 5 1 cfa00260=Glycine, serine and threonine metabolism
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
110 7.6 9.6 2 1 3 5 2 2 cfa00240=Pyrimidine metabolism
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
111 0.05 2.67 2.62 6 1 30 29 3 cfa00982=Drug metabolism - cytochrome P450
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
112 -0.08 8.33 8.41 84 1 30 29 3 cfa00564=Glycerophospholipid metabolism
d6b961721037 Miller Lab Devshed version 4c04e35b18f6
Richard Burhans <burhans@bx.psu.edu>
parents: 14
diff changeset
113 etc.
14
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
114
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
115 </help>
8ae67e9fb6ff Uploaded Miller Lab Devshed version a51c894f5bed again [possible toolshed.g2 bug]
miller-lab
parents:
diff changeset
116 </tool>