w4mkmeans: w4mkmeans.xml comparison

comparison w4mkmeans.xml @ 0:6ccbe18131a6 draft

planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit 299e5c7fdb0d6eb0773f3660009f6d63c2082a8d

author	eschen42
date	Tue, 08 Aug 2017 15:30:38 -0400
parents
children	02cafb660b72

comparison

equal deleted inserted replaced

--1:000000000000
+:6ccbe18131a6
+<tool id="w4mkmeans" name="Kmeans_for_W4M" version="0.98.1">
+<description>Calculate K-means for dataMatrix features or samples</description>
+<requirements>
+<requirement type="package" version="3.3.2">r-base</requirement>
+<requirement type="package" version="1.1_4">r-batch</requirement>
+</requirements>
+<stdio>
+<exit_code range="1:" level="fatal" />
+</stdio>
+<command detect_errors="aggressive"><![CDATA[
+Rscript $__tool_directory__/w4mkmeans_wrapper.R
+tool_directory $__tool_directory__
+data_matrix_path '$dataMatrix_in'
+variable_metadata_path '$variableMetadata_in'
+sample_metadata_path '$sampleMetadata_in'
+ksamples '$ksamples'
+kfeatures '$kfeatures'
+iter_max '$iter_max'
+nstart '$nstart'
+algorithm '$algorithm'
+scores_out '$scores_out'
+sampleMetadata_out '$sampleMetadata_out'
+variableMetadata_out '$variableMetadata_out'
+slots "\${GALAXY_SLOTS:-1}"
+; echo exit code $?
+]]></command>
+<inputs>
+<param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, separator: tab" />
+<param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />
+<param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" />
+<param name="ksamples" label="K value(s) for samples" type="text" value = "0" help="[ksamples] Single K or comma-separated Ks for samples, or 0 for none." />
+<param name="kfeatures" label="K value(s) for features" type="text" value = "0" help="[kfeatures] Single K or comma-separated Ks for features (variables), or 0 for none." />
+<param name="iter_max" label="Max number of iterations" type="text" value = "10" help="[iter_max] The maximum number of iterations allowed; default 10." />
+<param name="nstart" label="Number of random sets" type="text" value = "1" help="[nstart] How many random sets should be chosen; default 1." />
+<param name="algorithm" label="Algorithm for clustering" type="select" value = "Hartigan-Wong" help="[algorithm] K-means clustering algorithm, default 'Hartigan-Wong'; alternatives 'Lloyd', 'MacQueen'; 'Forgy' is a synonym for 'Lloyd', see references for further info.">
+<option value="Forgy">Forgy</option>
+<option value="Hartigan-Wong" selected="True">Hartigan-Wong</option>
+<option value="Lloyd">Lloyd</option>
+<option value="MacQueen">MacQueen</option>
+</param>
+</inputs>
+<outputs>
+<data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>
+<data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>
+<data name="scores_out" label="${tool.name}_${dataMatrix_in.name}.kmeans" format="tabular" ></data>
+</outputs>
+<tests>
+<test>
+<param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
+<param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
+<param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
+<param name="ksamples" value="3,4"/>
+<param name="kfeatures" value="5,6,7"/>
+<param name="iter_max" value="10"/>
+<param name="nstart" value="1"/>
+<param name="algorithm" value="Hartigan-Wong"/>
+<output name="scores_out">
+<assert_contents>
+<has_text     text="proportion" />
+<has_text     text="0.87482" />
+<has_text     text="0.89248" />
+<has_text     text="0.95355" />
+<has_text     text="0.95673" />
+<has_text     text="0.95963" />
+</assert_contents>
+</output>
+</test>
+</tests>
+<help>
+<![CDATA[
+**Author** - Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu)
+---------------------------------------------------------------------------
+**Source** - The source code for the w4mkmeans tool is available (from the Hegeman lab github repository) at https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper
+**R code used** - The R code invoked by this wrapper is the R 'stats::kmeans' package
+----------------------------------------------------------------------------------------------------------------------------------------------------------------------
+**Tool updates**
+See the **NEWS** section at the bottom of this page
+---------------------------------------------------
+===========================
+K-means for W4M data matrix
+===========================
+-----------
+Description
+-----------
+Calculate K-means for sample-clusters (or feature-clusters, or both) using W4M dataMatrix (i.e., XCMS-preprocessed data files) as input.
+*Please note that XCMS refers to features as 'variables'.  This documentation does not use either term consistently.*
+-----------------
+Workflow Position
+-----------------
+- Tool category: Statistical Analysis
+- Upstream tool category: Preprocessing
+- Downstream tool categories: Statistical Analysis
+----------
+Motivation
+----------
+This tool clusters samples, features (variables), or both from the W4M dataMatrix and writes the results to new columns in sampleMetadata, variableMetadata, or both, respectively.
+- If several, comma-separated K's are supplied, then one column is added for each K.
+- This clustering is **not** hierarchical; each member of a cluster is not a member of any other cluster.
+- For feature-clustering, each feature is assigned to a cluster such that the feature's response for all samples is closer to the mean of all features for that cluster than to the mean for any other cluster.
+- For sample-clustering, each sample is assigned to a cluster such that the sample's response for all features is closer to the mean of all samples for that cluster than to the mean for any other cluster.
+-----------
+Input files
+-----------
++--------------------------------------------+------------+
+| File                                       |   Format   |
++============================================+============+
+|     Data matrix                            |   tabular  |
++--------------------------------------------+------------+
+|     Sample metadata                        |   tabular  |
++--------------------------------------------+------------+
+|     Variable (i.e., feature) metadata      |   tabular  |
++--------------------------------------------+------------+
+----------
+Parameters
+----------
+**Data matrix** - input-file dataset
+- XCMS variable x sample 'dataMatrix' (tabular separated values) file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the sample and feature metadata, respectively (see below)
+**Sample metadata** - input-file dataset
+- XCMS sample x metadata 'sampleMetadata' (tabular separated values) file of the numeric and/or character sample metadata, with . as decimal and NA for missing values
+**Feature metadata** - input-file dataset
+- XCMS variable x metadata 'variableMetadata' (tabular separated values) file of the numeric and/or character feature metadata, with . as decimal and NA for missing values
+**kfeatures** - K or K's for features (default = 0)
+- integer or comma-separated integers ; zero (the default) or less will result in no calculation.
+**ksamples** - K or K-range for samples (default = 0)
+- integer or comma-separated integers ; zero (the default) or less will result in no calculation.
+**iter_max** - maximum_iterations (default = 10)
+- maximum number of iterations per calculation (see https://stat.ethz.ch/R-manual/R-devel/library/stats/html/kmeans.html).
+**nstart** - how many random sets should be chosen (default = 1)
+- maximum number of iterations per calculation (see https://stat.ethz.ch/R-manual/R-devel/library/stats/html/kmeans.html).
+------------
+Output files
+------------
+**XCMS sampleMetadata** - (tabular separated values) file identical to the Sample metadata file given as an input argument, excepting one column added for each K
+- **k#** - cluster number for clustering samples with K = #
+**XCMS variableMetadata** - (tabular separated values) file identical to the Feature metadata file given as an input argument, excepting one column added for each K
+- **k#** - cluster number for clustering features with K = #
+**scores** - (tabular separated values) file with one line for each K.
+- **clusterOn** - what was clustered - either 'sample' or 'feature'
+- **k** - the chosen K for clustering
+- **totalSS** - total (*between-treatements* plus total of *within-treatements*) sum of squares
+- **betweenSS** - *between-treatements* sum of squares
+- **proportion** - betweenSS / totalSS
+---------------
+Working example
+---------------
+**Input files**
++-------------------+-------------------------------------------------------------------------------------------------------------------+
+| Input File        | Download from URL                                                                                                 |
++===================+===================================================================================================================+
+| Data matrix       | https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_dataMatrix.tsv       |
++-------------------+-------------------------------------------------------------------------------------------------------------------+
+| Sample metadata   | https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_sampleMetadata.tsv   |
++-------------------+-------------------------------------------------------------------------------------------------------------------+
+| Feature metadata  | https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_variableMetadata.tsv |
++-------------------+-------------------------------------------------------------------------------------------------------------------+
+**Other input parameters**
++-----------------+---------------+
+| Input Parameter | Value         |
++=================+===============+
+| ksamples        | 3,4           |
++-----------------+---------------+
+| kfeatures       | 5,6,7         |
++-----------------+---------------+
+| iter_max        | 10            |
++-----------------+---------------+
+| nstart          | 1             |
++-----------------+---------------+
+| algorithm       | Hartigan-Wong |
++-----------------+---------------+
+----
+NEWS
+----
+August 2017, Version 0.98.1 - First release
+---------
+Citations
+---------
+]]>
+</help>
+<citations>
+<citation type="bibtex"><![CDATA[
+@incollection{RCoreTeam2017,
+title = {stats::kmeans - K-Means Clustering},
+booktitle = {R: A Language and Environment for Statistical Computing},
+author = {{R Core Team}},
+publisher = {R Foundation for Statistical Computing},
+address = {Vienna, Austria},
+year = {2017},
+url = {https://stat.ethz.ch/R-manual/R-devel/library/stats/html/kmeans.html},
+}
+]]></citation>
+<!-- Forgy algorithm -->
+<citation type="bibtex"><![CDATA[
+@article{forgy65,
+added-at = {2006-03-23T12:22:43.000+0100},
+author = {Forgy, E.},
+biburl = {https://www.bibsonomy.org/bibtex/21e31409932ce91df646c4731350e1207/hotho},
+interhash = {c86383cba8cfe00d5e6ef200016aca3f},
+intrahash = {1e31409932ce91df646c4731350e1207},
+journal = {Biometrics},
+keywords = {clustering kmeans},
+number = 3,
+pages = {768-769},
+timestamp = {2006-03-23T12:22:43.000+0100},
+title = {Cluster Analysis of Multivariate Data: Efficiency versus Interpretability of Classification},
+volume = 21,
+year = 1965
+}
+]]></citation>
+<!-- W4M 3.0 - Guitton et al. 2017-->
+<citation type="doi">10.1016/j.biocel.2017.07.002</citation>
+<!-- W4M 2.5 - Giacomini et al. 2014 -->
+<citation type="doi">10.1093/bioinformatics/btu813</citation>
+<!-- Hartigan and Wong algorithm -->
+<citation type="bibtex"><![CDATA[
+@article{Hartigan79,
+added-at = {2007-02-27T16:22:09.000+0100},
+author = {Hartigan, J. and Wong, M.},
+biburl = {https://www.bibsonomy.org/bibtex/23d8bfc440c5725783876929c022f67ce/pierpaolo.pk81},
+description = {WSD},
+interhash = {10d6d33920d9af578a4d0a556dc1477d},
+intrahash = {3d8bfc440c5725783876929c022f67ce},
+journal = {Applied Statistics},
+keywords = {imported},
+pages = {100-108},
+timestamp = {2007-02-27T16:22:11.000+0100},
+title = {Algorithm AS136: A k-means clustering algorithm},
+volume = 28,
+year = 1979
+}
+]]></citation>
+<!-- Lloyd algorithm -->
+<citation type="doi">10.1109/TIT.1982.1056489</citation>
+<!-- MacQueen algorithm -->
+<citation type="bibtex"><![CDATA[
+@inproceedings{MacQueen1967,
+added-at = {2011-01-11T13:35:01.000+0100},
+author = {MacQueen, J. B.},
+biburl = {https://www.bibsonomy.org/bibtex/25dcdb8cd9fba78e0e791af619d61d66d/enitsirhc},
+booktitle = {Proc. of the fifth Berkeley Symposium on Mathematical Statistics and Probability},
+editor = {Cam, L. M. Le and Neyman, J.},
+interhash = {8d7d4dfe7d3a06b8c9c3c2bb7aa91e28},
+intrahash = {5dcdb8cd9fba78e0e791af619d61d66d},
+keywords = {kmeans clustering},
+pages = {281-297},
+publisher = {University of California Press},
+timestamp = {2011-01-11T13:35:01.000+0100},
+title = {Some Methods for Classification and Analysis of MultiVariate Observations},
+volume = 1,
+year = 1967
+}
+]]></citation>
+</citations>
+<!--
+vim:et:sw=2:ts=2:
+--> </tool>

Mercurial > repos > eschen42 > w4mkmeans

comparison w4mkmeans.xml @ 0:6ccbe18131a6 draft