secimtools: svm_classifier.xml comparison

comparison svm_classifier.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"

author	malex
date	Mon, 08 Mar 2021 22:04:06 +0000
parents
children	caba07f41453

comparison

equal deleted inserted replaced

-:b54326490b4d
+:2e7d47c0b027
+<tool id="secimtools_svm_classifier" name="Support Vector Machine (SVM) Classifier" version="@WRAPPER_VERSION@">
+<description>- Predict sample groups.</description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<expand macro="requirements" />
+<command><![CDATA[
+svm_classifier.py
+--train_wide $train_wide
+--train_design $train_design
+--test_wide $test_wide
+--test_design $test_design
+--group $group
+--ID $uniqID
+--kernel $kernel
+--degree $degree
+--C $C
+--cross_validation $cross_validation
+--C_lower_bound $C_lower_bound
+--C_upper_bound $C_upper_bound
+--a $a
+--b $b
+--outClassification         $outClassification
+--outClassificationAccuracy $outClassificationAccuracy
+--outPrediction             $outPrediction
+--outPredictionAccuracy     $outPredictionAccuracy
+]]></command>
+<inputs>
+<param name="train_wide" type="data" format="tabular" label="Training wide dataset" help="Dataset missing? See TIP below."/>
+<param name="train_design" type="data" format="tabular" label="Training design file" help="Dataset missing? See TIP below."/>
+<param name="test_wide" type="data" format="tabular" label="Target wide dataset" help="Dataset missing? See TIP below."/>
+<param name="test_design" type="data" format="tabular" label="Target design file" help="Dataset missing? See TIP below."/>
+<param name="group" size="30" type="text" value="" label="Group/Treatment" help="Name of the column in your Training and Target design files that contain group classifications."/>
+<param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your Training and Target wide datasets that contain unique identifiers."/>
+<param name="kernel" type="select" size="30" display="radio" value="rbf" label="Select a SVM Kernel Function">
+<option value="rbf">Radial Basis function (Gaussian)</option>
+<option value="linear">Linear</option>
+<option value="poly">Polynomial</option>
+<option value="sigmoid">Sigmoid</option>
+</param>
+<param name="degree" size="30" type="text" value="3" label="Polynomial Degree" help='Only used for the polynomial kernel.'/>
+<param name="cross_validation" type="select" size="30" display="radio" value="double" label="Select Cross-Validation">
+<option value="none">None</option>
+<option value="single">Single</option>
+<option value="double">Double</option>
+</param>
+<param name="C" size="30" type="text" value="1" label="Regularization Parameter C" help='See references in tool description for setting this parameter. Value must be positive (C > 0). Used only if cross-validation is not selected. Default = 1.'/>
+<param name="C_lower_bound" size="30" type="text" value="0.1" label="Regularization Parameter C (Lower Bound)" help='Defines the lower bound for regularization parameter C when cross-validation is used. Must have a positive value (C > 0) Default = 0.1. '/>
+<param name="C_upper_bound" size="30" type="text" value="10" label="Regularization Parameter C (Upper Bound)" help='Defines the upper bound for regularization parameter C when cross-validation is used. Must have a positive value that is larger than the lower bound. Default = 10. '/>
+<param name="a" size="30" type="text" value="0.0" label="Coefficient A" help='Used in the kernel functions above.  Must be greater than zero. However, the default = 0 and translates to a = 1/n_features, where n_features is the number of features. Default = 0.'/>
+<param name="b" size="30" type="text" value="0.0" label="Coefficient B" help='Independent term in kernel function. It is only significant in polynomial and sigmoid kernels. Default = 0.'/>
+</inputs>
+<outputs>
+<data name="outClassification" format="tabular" label="${tool.name} on ${on_string}: Classification of the Training Data Set"/>
+<data name="outClassificationAccuracy" format='tabular' label="${tool.name} on ${on_string}: Classification Accuracy of the Training Data Set"/>
+<data name="outPrediction" format="tabular" label="${tool.name} on ${on_string}: Prediction Accuracy of the Training Data Set"/>
+<data name="outPredictionAccuracy" format='tabular' label="${tool.name} on ${on_string}: Prediction Accuracy of the Training Data Set"/>
+</outputs>
+<tests>
+<test>
+<param name="train_wide"       value="ST000006_data.tsv"/>
+<param name="train_design"     value="ST000006_design.tsv"/>
+<param name="test_wide"        value="ST000006_data.tsv"/>
+<param name="test_design"      value="ST000006_design.tsv"/>
+<param name="group"            value="White_wine_type_and_source" />
+<param name="uniqID"           value="Retention_Index" />
+<param name="kernel"           value="linear"/>
+<param name="degree"           value="3"/>
+<param name="cross_validation" value="none"/>
+<param name="C"                value="1"/>
+<param name="C_lower_bound"    value="0.1"/>
+<param name="C_upper_bound"    value="2"/>
+<param name="a"                value="1"/>
+<param name="b"                value="1"/>
+<output name="outClassification"         file="ST000006_svm_classifier_train_classification.tsv" />
+<output name="outClassificationAccuracy" file="ST000006_svm_classifier_train_classification_accuracy.tsv" />
+<output name="outPrediction"             file="ST000006_svm_classifier_target_classification.tsv" />
+<output name="outPredictionAccuracy"     file="ST000006_svm_classifier_target_classification_accuracy.tsv" />
+</test>
+</tests>
+<help><![CDATA[
+**TIP:**
+If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*.
+**WARNINGS:**
+- (1) This script automatically removes spaces and special characters from strings.
+- (2) If a feature name starts with a number it will prepend an '_'.
+**Tool Description**
+**NOTE: A minimum of 100 samples is required by the tool for single or double cross validation**
+Given a set of supervised samples in a Training Dataset, the SVM training algorithm builds a model based on these samples that can be used for predicting the categories of new, unclassified samples in a Target Dataset.
+The Target Dataset is not used for model training or evaluation, only for prediction based on the finalized model.
+SVM classification is performed on the target data and accuracy is estimated for both Target and Training Datasets.
+SVM uses different kernel functions to carry out different types of classification such as radial bassis (gaussian), linear, polynomial, and sigmoid.
+The classification model can be trained with and without cross-validation (single or double).
+For single and double cross-validation: the training dataset is split differently when the model fit is performed.
+In single cross-validation: the same data are used to both fit and evaluate the model.
+In double cross-validation: the training dataset is split into pieces and the model fit is performed on one of the pieces and evaluated on the other pieces.
+Under cross-validation, the user specifies Regularization Parameter C and the Upper and Lower bounds of Regularization Parameter C.
+For more information about Regularization Parameter C, see references below:
+Cortes, C. and Vapnik, V. 1995.  Support-vector networks. Machine Learning. 20(3) 273-297.
+Steinwart, I and Christmann, A.  2008.  Support vector machines. Springer Science and Business Media.
+To use the SVM tool, users need the following information:
+(i) a Training Dataset with known categories in the training design file and
+(ii) a Target Dataset with predicted categories in the target design file.
+(iii) the name of the Group/Treatment classification column should be the same for both design files.
+(iv) the Unique Feature IDs should be the same in both the wide datasets.
+(v) the number of Unique Feature IDs should be the same in both the wide datasets.
+------------------------------------------------------------------------------
+**Input**
+- Four input datasets are required.
+@WIDE@
+**NOTE:** The sample IDs must match the sample IDs in the Design File
+(below). Extra columns will automatically be ignored.
+@METADATA@
+@GROUP@
+@UNIQID@
+**SVM Kernel Function**
+- Kernel functions available for the SVM algorithm.
+**Polynomial Degree**
+- Only used for the polynomial kernel.
+**Cross-Validation Choice**
+- Cross-validation options available for the user. 'None' corresponds to no cross-validation- the user specifies regularization parameter C manually.
+**Regularization Parameter C**
+- Penalizes potential overfitting, must be positive.
+**Regularization Parameter C (Lower Bound)**
+- Lower bound for regularization parameter C. Value must be greater than 0. Only if cross-validation is selected.
+**Regularization Parameter C (Upper Bound)**
+- Upper bound for regularization parameter C. Value must be greater than the Lower Bound.
+**Coefficient A**
+- Used in the kernel functions above.  Must be greater than zero. Default = 0, however,
+this translates to a = 1/n_features, where n_features is the number of features.
+**Coefficent B**
+- Independent term in the kernel function. It is only significant in
+polynomial and sigmoid kernels.
+------------------------------------------------------------------------------
+**Output**
+This tool will output two files for the Training dataset and two for the Target datset:
+Training:
+(1) a TSV file containing the observed and predicted grouping classifications for each sample and
+(2) a TSV file containing the accuracy (percentage) of the classification.
+Target:
+(3) a TSV file containing suspected and predicted grouping classifications for each sample and
+(4) a TSV file containing the accuracy (percentage) of the prediction in comparison to the suspected grouping specified in the design file.
+**NOTE:** Some knowledge about the SVM classifier algorithm and different kernel types is recommended for users who plan to use the tool frequently with different settings.
+]]></help>
+<expand macro="citations"/>
+</tool>

Mercurial > repos > malex > secimtools

comparison svm_classifier.xml @ 1:2e7d47c0b027 draft