diff rank_pathways.xml @ 12:4b6590dd7250

Uploaded
author miller-lab
date Wed, 12 Sep 2012 17:10:26 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rank_pathways.xml	Wed Sep 12 17:10:26 2012 -0400
@@ -0,0 +1,74 @@
+<tool id="gd_calc_freq" name="Rank" version="1.0.0">
+  <description>affected KEGG pathways</description>
+
+  <command interpreter="python">
+    #if str($output_format) == 'a'
+      calctfreq.py
+    #else if str($output_format) == 'b'
+      calclenchange.py
+    #end if
+        "--loc_file=${GALAXY_DATA_INDEX_DIR}/gd.rank.loc"
+        "--species=${input.metadata.dbkey}"
+        "--input=${input}"
+        "--output=${output}"
+        "--posKEGGclmn=${input.metadata.kegg_path}"
+        "--KEGGgeneposcolmn=${input.metadata.kegg_gene}"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="gd_sap" label="Table">
+        <validator type="metadata" check="kegg_gene,kegg_path" message="Missing KEGG gene code column and/or KEGG pathway code/name column metadata.  Click the pencil icon in the history item to edit/save the metadata attributes" />
+    </param>
+    <param name="output_format" type="select" label="Output format">
+      <option value="a" selected="true">ranked by percentage of genes affected</option>
+      <option value="b">ranked by change in length and number of paths</option>
+    </param>
+  </inputs>
+
+  <outputs>
+    <data name="output" format="tabular" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name="input" value="test_in/sample.gd_sap" ftype="gd_sap" />
+      <param name="output_format" value="a" />
+      <output name="output" file="test_out/rank_pathways/rank_pathways.tabular" />
+    </test>
+  </tests>
+
+  <help>
+**What it does**
+
+This tool produces a table ranking the pathways based on the percentage
+of genes in an input dataset, out of the total in each pathway.
+Alternatively, the tool ranks the pathways based on the change in
+length and number of paths connecting sources and sinks.  This change is
+calculated between graphs representing pathways with and without excluding
+the nodes that represent the genes in an input list.  Sources are all
+the nodes representing the initial reactants/products in the pathway.
+Sinks are all the nodes representing the final reactants/products in
+the pathway.
+
+If pathways are ranked by percentage of genes affected, the output is
+a tabular dataset with the following columns:
+
+   1. number of genes in the pathway present in the input dataset
+   2. percentage of the total genes in the pathway included in the input dataset
+   3. rank of the frequency (from high freq to low freq)
+   4. name of the pathway
+
+If pathways are ranked by change in length and number of paths, the
+output is a tabular dataset with the following columns:
+
+   1. change in the mean length of paths between sources and sinks
+   2. mean length of paths between sources and sinks in the pathway including the genes in the input dataset.  If the pathway do not have sources/sinks, the length is assumed to be infinite (I)
+   3. mean length of paths between sources and sinks in the pathway excluding the genes in the input dataset.  If the pathway do not have sources/sinks, the length is assumed to be infinite (I)
+   4. rank of the change in the mean length of paths between sources and sinks (from high change to low change)
+   5. change in the number of paths between sources and sinks
+   6. number of paths between sources and sinks in the pathway including the genes in the input dataset.  If the pathway do not have sources/sinks, it is assumed to be a circuit (C)
+   7. number of paths between sources and sinks in the pathway excluding the genes in the input dataset.  If the pathway do not have sources/sinks, it is assumed to be a circuit (C)
+   8. rank of the change in the number of paths between sources and sinks (from high change to low change)
+   9. name of the pathway
+  </help>
+</tool>