diff galaxy-conf/KMeans.xml @ 24:a77e126ae856 draft

Reupload since last upload did not load correctly
author timpalpant
date Tue, 19 Jun 2012 22:15:09 -0400
parents 9d56b5b85740
children b43c420a6135
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/galaxy-conf/KMeans.xml	Tue Jun 19 22:15:09 2012 -0400
@@ -0,0 +1,41 @@
+<tool id="KMeans" name="KMeans cluster" version="1.0.0">
+  <description>an aligned matrix</description>
+  <command interpreter="sh">galaxyToolRunner.sh visualization.KMeans -i $input -k $K -1 $min -2 $max -o $output</command>
+  <inputs>
+    <param format="tabular" name="input" type="data" label="Aligned matrix" />
+    <param name="K" type="integer" value="10" label="Number of clusters" />
+    <param name="min" type="integer" value="1" label="Minimum column to use for clustering" />
+    <param name="max" type="integer" value="-1" label="Maximum column to use for clustering (-1 to end)" />
+  </inputs>
+  <outputs>
+    <data format="tabular" name="output" metadata="input" />
+  </outputs>
+  <tests>
+  </tests>
+  
+  <help>
+    
+.. class:: warningmark
+
+This tool requires tabular data in matrix2png format (with column AND row headers). For more information about the required format and usage instructions, see the matrix2png_ website.
+
+.. _matrix2png: http://bioinformatics.ubc.ca/matrix2png/dataformat.html
+
+.. class:: infomark
+
+You can use the "Align values in a matrix" tool to create a matrix, then use this tool to cluster the matrix with k-means.
+
+.. class:: infomark
+
+**TIP:** You can use the **min** and **max** columns to cluster a large matrix based on a subset of the columns. For example, you could cluster a 4000x4000 matrix on columns 200-300 by setting min = 200 and max = 300. This will greatly increase the efficiency of distance calculations during the k-means EM, and also allows you to cluster based on specific regions, such as promoters or coding sequences.
+
+-----
+
+This tool will cluster the rows in an aligned matrix with KMeans_. The implementation builds upon the KMeansPlusPlusClusterer available in commons-math3_.
+
+.. _KMeans: http://en.wikipedia.org/wiki/K-means_clustering
+
+.. _commons-math3: http://commons.apache.org/math/
+
+  </help>
+</tool>