Mercurial > repos > bgruening > sklearn_feature_selection

diff main_macros.xml @ 0:092199a095dd draft
planemo upload for repository https://github.com/bgruening/galaxytools/tools/sklearn commit 7a31960686122d7e53054fef4996525f04ebd254
author: bgruening
date: Thu, 12 Apr 2018 08:23:30 -0400
children: 58322d3c7bd3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main_macros.xml	Thu Apr 12 08:23:30 2018 -0400
@@ -0,0 +1,892 @@
+<macros>
+  <token name="@VERSION@">0.9</token>
+
+  <token name="@COLUMNS_FUNCTION@">
+def read_columns(f, c, **args):
+  data = pandas.read_csv(f, **args)
+  cols = c.split (',')
+  cols = map(int, cols)
+  cols = list(map(lambda x: x - 1, cols))
+  y = data.iloc[:,cols].values
+  return y
+  </token>
+
+  <xml name="python_requirements">
+      <requirements>
+          <requirement type="package" version="2.7">python</requirement>
+          <requirement type="package" version="0.19.1">scikit-learn</requirement>
+          <requirement type="package" version="0.22.0">pandas</requirement>
+          <yield />
+      </requirements>
+  </xml>
+
+  <xml name="macro_stdio">
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error"/>
+    </stdio>
+  </xml>
+
+
+  <!--Generic interface-->
+  <xml name="train_loadConditional" token_train="tabular" token_data="tabular" token_model="txt">
+    <conditional name="selected_tasks">
+        <param name="selected_task" type="select" label="Select a Classification Task">
+            <option value="train" selected="true">Train a model</option>
+            <option value="load">Load a model and predict</option>
+        </param>
+        <when value="load">
+            <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file."/>
+            <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify."/>
+            <conditional name="prediction_options">
+                <param name="prediction_option" type="select" label="Select the type of prediction">
+                    <option value="predict">Predict class labels</option>
+                    <option value="advanced">Include advanced options</option>
+                </param>
+                <when value="predict">
+                </when>
+                <when value="advanced">
+                </when>
+            </conditional>
+        </when>
+        <when value="train">
+            <param name="infile_train" type="data" format="@TRAIN@" label="Training samples (tabular)"/>
+            <conditional name="selected_algorithms">
+                <yield />
+            </conditional>
+        </when>
+    </conditional>
+  </xml>
+
+  <xml name="sl_Conditional" token_train="tabular" token_data="tabular" token_model="txt">
+    <conditional name="selected_tasks">
+        <param name="selected_task" type="select" label="Select a Classification Task">
+            <option value="train" selected="true">Train a model</option>
+            <option value="load">Load a model and predict</option>
+        </param>
+        <when value="load">
+            <param name="infile_model" type="data" format="@MODEL@" label="Models" help="Select a model file."/>
+            <param name="infile_data" type="data" format="@DATA@" label="Data (tabular)" help="Select the dataset you want to classify."/>
+            <param name="header" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
+            <conditional name="prediction_options">
+                <param name="prediction_option" type="select" label="Select the type of prediction">
+                    <option value="predict">Predict class labels</option>
+                    <option value="advanced">Include advanced options</option>
+                </param>
+                <when value="predict">
+                </when>
+                <when value="advanced">
+                </when>
+            </conditional>
+        </when>
+        <when value="train">
+            <conditional name="selected_algorithms">
+                <yield />
+            </conditional>
+        </when>
+    </conditional>
+  </xml>
+
+  <xml name="advanced_section">
+    <section name="options" title="Advanced Options" expanded="False">
+      <yield />
+    </section>
+  </xml>
+
+
+  <!--Generalized Linear Models-->
+  <xml name="loss" token_help=" " token_select="false">
+    <param argument="loss" type="select" label="Loss function"  help="@HELP@">
+        <option value="squared_loss" selected="@SELECT@">squared loss</option>
+        <option value="huber">huber</option>
+        <option value="epsilon_insensitive">epsilon insensitive</option>
+        <option value="squared_epsilon_insensitive">squared epsilon insensitive</option>
+        <yield/>
+    </param>
+  </xml>
+
+  <xml name="penalty" token_help=" ">
+    <param argument="penalty" type="select" label="Penalty (regularization term)"  help="@HELP@">
+        <option value="l2" selected="true">l2</option>
+        <option value="l1">l1</option>
+        <option value="elasticnet">elastic net</option>
+        <option value="none">none</option>
+        <yield/>
+    </param>
+  </xml>
+
+  <xml name="l1_ratio" token_default_value="0.15" token_help=" ">
+    <param argument="l1_ratio" type="float" value="@DEFAULT_VALUE@" label="Elastic Net mixing parameter" help="@HELP@"/>
+  </xml>
+
+  <xml name="epsilon" token_default_value="0.1" token_help="Used if loss is ‘huber’, ‘epsilon_insensitive’, or ‘squared_epsilon_insensitive’. ">
+    <param argument="epsilon" type="float" value="@DEFAULT_VALUE@" label="Epsilon (epsilon-sensitive loss functions only)" help="@HELP@"/>
+  </xml>
+
+  <xml name="learning_rate_s" token_help=" " token_selected1="false" token_selected2="false">
+    <param argument="learning_rate" type="select" optional="true" label="Learning rate schedule"  help="@HELP@">
+        <option value="optimal" selected="@SELECTED1@">optimal</option>
+        <option value="constant">constant</option>
+        <option value="invscaling" selected="@SELECTED2@">inverse scaling</option>
+        <yield/>
+    </param>
+  </xml>
+
+  <xml name="eta0" token_default_value="0.0" token_help="Used with ‘constant’ or ‘invscaling’ schedules. ">
+    <param argument="eta0" type="float" value="@DEFAULT_VALUE@" label="Initial learning rate" help="@HELP@"/>
+  </xml>
+
+  <xml name="power_t" token_default_value="0.5" token_help=" ">
+    <param argument="power_t" type="float" value="@DEFAULT_VALUE@" label="Exponent for inverse scaling learning rate" help="@HELP@"/>
+  </xml>
+
+  <xml name="normalize" token_checked="false" token_help=" ">
+    <param argument="normalize" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Normalize samples before training" help=" "/>
+  </xml>
+
+  <xml name="copy_X" token_checked="true" token_help=" ">
+    <param argument="copy_X" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use a copy of samples" help="If false, samples would be overwritten. "/>
+  </xml>
+
+  <xml name="ridge_params">
+    <expand macro="normalize"/>
+    <expand macro="alpha" default_value="1.0"/>
+    <expand macro="fit_intercept"/>
+    <expand macro="max_iter" default_value=""/>
+    <expand macro="tol" default_value="0.001" help_text="Precision of the solution. "/>
+    <!--class_weight-->
+    <expand macro="copy_X"/>
+    <param argument="solver" type="select" value="" label="Solver to use in the computational routines" help=" ">
+        <option value="auto" selected="true">auto</option>
+        <option value="svd">svd</option>
+        <option value="cholesky">cholesky</option>
+        <option value="lsqr">lsqr</option>
+        <option value="sparse_cg">sparse_cg</option>
+        <option value="sag">sag</option>
+    </param>
+    <expand macro="random_state"/>
+  </xml>
+
+  <!--Ensemble methods-->
+  <xml name="n_estimators" token_default_value="10" token_help=" ">
+    <param argument="n_estimators" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of trees in the forest" help="@HELP@"/>
+  </xml>
+
+  <xml name="max_depth" token_default_value="" token_help=" ">
+    <param argument="max_depth" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum depth of the tree" help="@HELP@"/>
+  </xml>
+
+  <xml name="min_samples_split" token_type="integer" token_default_value="2" token_help=" ">
+    <param argument="min_samples_split" type="@TYPE@" optional="true" value="@DEFAULT_VALUE@" label="Minimum number of samples required to split an internal node" help="@HELP@"/>
+  </xml>
+
+  <xml name="min_samples_leaf" token_type="integer" token_default_value="1" token_label="Minimum number of samples in newly created leaves" token_help=" ">
+    <param argument="min_samples_leaf" type="@TYPE@" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP@"/>
+  </xml>
+
+  <xml name="min_weight_fraction_leaf" token_default_value="0.0" token_help=" ">
+    <param argument="min_weight_fraction_leaf" type="float" optional="true" value="@DEFAULT_VALUE@" label="Minimum weighted fraction of the input samples required to be at a leaf node" help="@HELP@"/>
+  </xml>
+
+  <xml name="max_leaf_nodes" token_default_value="" token_help=" ">
+    <param argument="max_leaf_nodes" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Maximum number of leaf nodes in best-first method" help="@HELP@"/>
+  </xml>
+
+  <xml name="min_impurity_decrease" token_default_value="0" token_help=" ">
+    <param argument="min_impurity_decrease" type="float" value="@DEFAULT_VALUE@" optional="true" label="The threshold value of impurity for stopping node splitting" help="@HELP@"/>
+  </xml>
+
+  <xml name="bootstrap" token_checked="true" token_help=" ">
+    <param argument="bootstrap" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolflase" checked="@CHECKED@" label="Use bootstrap samples for building trees." help="@HELP@"/>
+  </xml>
+
+  <xml name="criterion" token_help=" ">
+    <param argument="criterion" type="select" label="Function to measure the quality of a split"  help=" ">
+        <option value="gini" selected="true">Gini impurity</option>
+        <option value="entropy">Information gain</option>
+        <yield/>
+    </param>
+  </xml>
+
+  <xml name="criterion2" token_help="">
+    <param argument="criterion" type="select" label="Function to measure the quality of a split" >
+      <option value="mse">mse - mean squared error</option>
+      <option value="mae">mae - mean absolute error</option>
+      <yield/>
+    </param>
+  </xml>
+
+  <xml name="oob_score" token_checked="false" token_help=" ">
+    <param argument="oob_score" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Use out-of-bag samples to estimate the generalization error" help="@HELP@"/>
+  </xml>
+
+  <xml name="max_features">
+    <conditional name="select_max_features">
+      <param argument="max_features" type="select" label="max_features">
+        <option value="auto" selected="true">auto - max_features=n_features</option>
+        <option value="sqrt">sqrt - max_features=sqrt(n_features)</option>
+        <option value="log2">log2 - max_features=log2(n_features)</option>
+        <option value="number_input">I want to type the number in or input None type</option>
+      </param>
+      <when value="auto">
+      </when>
+      <when value="sqrt">
+      </when>
+      <when value="log2">
+      </when>
+      <when value="number_input">
+        <param name="num_max_features" type="float" value="" optional="true" label="Input max_features number:" help="If int, consider the number of features at each split; If float, then max_features is a percentage and int(max_features * n_features) features are considered at each split."/>
+      </when>
+    </conditional>
+  </xml>
+
+  <xml name="verbose" token_default_value="0" token_help="If 1 then it prints progress and performance once in a while. If greater than 1 then it prints progress and performance for every tree.">
+    <param argument="verbose" type="integer" value="@DEFAULT_VALUE@" optional="true" label="Enable verbose output" help="@HELP@"/>
+  </xml>
+
+  <xml name="learning_rate" token_default_value="1.0" token_help=" ">
+    <param argument="learning_rate" type="float" optional="true" value="@DEFAULT_VALUE@" label="Learning rate" help="@HELP@"/>
+  </xml>
+
+  <xml name="subsample" token_help=" ">
+    <param argument="subsample" type="float" value="1.0" optional="true" label="The fraction of samples to be used for fitting the individual base learners" help="@HELP@"/>
+  </xml>
+
+  <xml name="presort">
+    <param argument="presort" type="select" label="Whether to presort the data to speed up the finding of best splits in fitting" >
+      <option value="auto" selected="true">auto</option>
+      <option value="true">true</option>
+      <option value="false">false</option>
+    </param>
+  </xml>
+
+  <!--Parameters-->
+  <xml name="tol" token_default_value="0.0" token_help_text="Early stopping heuristics based on the relative center changes. Set to default (0.0) to disable this convergence detection.">
+        <param argument="tol" type="float" optional="true" value="@DEFAULT_VALUE@" label="Tolerance" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="n_clusters" token_default_value="8">
+    <param argument="n_clusters" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of clusters" help=" "/>
+  </xml>
+
+  <xml name="fit_intercept" token_checked="true">
+    <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/>
+  </xml>
+
+  <xml name="n_jobs" token_default_value="1" token_label="The number of jobs to run in parallel for both fit and predict">
+    <param argument="n_jobs" type="integer" value="@DEFAULT_VALUE@" optional="true" label="@LABEL@" help="If -1, then the number of jobs is set to the number of cores"/>
+  </xml>
+
+  <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). ">
+    <param argument="n_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration">
+    <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results.">
+    <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="warm_start" token_checked="true" token_help_text="When set to True, reuse the solution of the previous call to fit as initialization,otherwise, just erase the previous solution.">
+    <param argument="warm_start" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Perform warm start" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term.">
+    <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/>
+  </xml>
+
+  <!--xml name="class_weight" token_default_value="" token_help_text="">
+    <param argument="class_weight" type="" optional="true" value="@DEFAULT_VALUE@" label="" help="@HELP_TEXT@"/>
+  </xml-->
+
+  <xml name="alpha" token_default_value="0.0001" token_help_text="Constant that multiplies the regularization term if regularization is used. ">
+    <param argument="alpha" type="float" optional="true" value="@DEFAULT_VALUE@" label="Regularization coefficient" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="n_samples" token_default_value="100" token_help_text="The total number of points equally divided among clusters.">
+    <param argument="n_samples" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of samples" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="n_features" token_default_value="2" token_help_text="Number of different numerical properties produced for each sample.">
+    <param argument="n_features" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of features" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="noise" token_default_value="0.0" token_help_text="Floating point number. ">
+    <param argument="noise" type="float" optional="true" value="@DEFAULT_VALUE@" label="Standard deviation of the Gaussian noise added to the data" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="C" token_default_value="1.0" token_help_text="Penalty parameter C of the error term. ">
+      <param argument="C" type="float" optional="true" value="@DEFAULT_VALUE@" label="Penalty parameter" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="max_iter" token_default_value="300" token_label="Maximum number of iterations per single run" token_help_text=" ">
+      <param argument="max_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="n_init" token_default_value="10" >
+      <param argument="n_init" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of runs with different centroid seeds" help=" "/>
+  </xml>
+
+  <xml name="init">
+      <param argument="init" type="select" label="Centroid initialization method"  help="''k-means++'' selects initial cluster centers that speed up convergence. ''random'' chooses k observations (rows) at random from data as initial centroids.">
+          <option value="k-means++">k-means++</option>
+          <option value="random">random</option>
+      </param>
+  </xml>
+
+  <xml name="gamma" token_default_value="1.0" token_label="Scaling parameter" token_help_text=" ">
+    <param argument="gamma" type="float" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="degree" token_default_value="3" token_label="Degree of the polynomial" token_help_text=" ">
+    <param argument="degree" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="coef0" token_default_value="1" token_label="Zero coefficient" token_help_text=" ">
+    <param argument="coef0" type="integer" optional="true" value="@DEFAULT_VALUE@" label="@LABEL@" help="@HELP_TEXT@"/>
+  </xml>
+
+  <xml name="pos_label" token_default_value="">
+    <param argument="pos_label" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Label of the positive class" help=" "/>
+  </xml>
+
+  <xml name="average">
+    <param argument="average" type="select" optional="true" label="Averaging type" help=" ">
+      <option value="micro">Calculate metrics globally by counting the total true positives, false negatives and false positives. (micro)</option>
+      <option value="samples">Calculate metrics for each instance, and find their average. Only meaningful for multilabel. (samples)</option>
+      <option value="macro">Calculate metrics for each label, and find their unweighted mean. This does not take label imbalance into account. (macro)</option>
+      <option value="weighted">Calculate metrics for each label, and find their average, weighted by support (the number of true instances for each label). This alters ‘macro’ to account for label imbalance; it can result in an F-score that is not between precision and recall. (weighted)</option>
+      <option value="None">None</option>
+      <yield/>
+    </param>
+  </xml>
+
+  <xml name="beta">
+    <param argument="beta" type="float" value="1.0" label="The strength of recall versus precision in the F-score" help=" "/>
+  </xml>
+
+
+  <!--Data interface-->
+  <xml name="tabular_input">
+    <param name="infile" type="data" format="tabular" label="Data file with numeric values"/>
+    <param name="start_column" type="data_column" data_ref="infile" optional="True" label="Select a subset of data. Start column:" />
+    <param name="end_column" type="data_column" data_ref="infile" optional="True" label="End column:" />
+  </xml>
+
+  <xml name="sample_cols" token_label1="File containing true class labels:" token_label2="File containing predicted class labels:" token_multiple1="False" token_multiple2="False" token_format1="tabular" token_format2="tabular" token_help1="" token_help2="">
+    <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/>
+    <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/>
+    <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/>
+    <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
+    <yield/>
+  </xml>
+
+  <xml name="samples_tabular" token_multiple1="False" token_multiple2="False">
+    <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/>
+    <param name="header1" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
+    <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select target column(s):"/>
+    <param name="infile2" type="data" format="tabular" label="Dataset containing class labels:"/>
+    <param name="header2" type="boolean" optional="True" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" />
+    <param name="col2" multiple="@MULTIPLE2@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
+    <yield/>
+  </xml>
+
+  <xml name="clf_inputs_extended" token_label1=" " token_label2=" " token_multiple="False">
+    <conditional name="true_columns">
+      <param name="selected_input1" type="select" label="Select the input type of true labels dataset:">
+          <option value="tabular" selected="true">Tabular</option>
+          <option value="sparse">Sparse</option>
+      </param>
+      <when value="tabular">
+        <param name="infile1" type="data" label="@LABEL1@"/>
+        <param name="col1" type="data_column" data_ref="infile1" label="Select the target column:"/>
+      </when>
+      <when value="sparse">
+          <param name="infile1" type="data" format="txt" label="@LABEL1@"/>
+      </when>
+    </conditional>
+    <conditional name="predicted_columns">
+      <param name="selected_input2" type="select" label="Select the input type of predicted labels dataset:">
+          <option value="tabular" selected="true">Tabular</option>
+          <option value="sparse">Sparse</option>
+      </param>
+      <when value="tabular">
+        <param name="infile2" type="data" label="@LABEL2@"/>
+        <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
+      </when>
+      <when value="sparse">
+          <param name="infile2" type="data" format="txt" label="@LABEL1@"/>
+      </when>
+    </conditional>
+  </xml>
+
+  <xml name="clf_inputs" token_label1="Dataset containing true labels (tabular):" token_label2="Dataset containing predicted values (tabular):" token_multiple1="False" token_multiple="False">
+    <param name="infile1" type="data" format="tabular" label="@LABEL1@"/>
+    <param name="col1" multiple="@MULTIPLE1@" type="data_column" data_ref="infile1" label="Select the target column:"/>
+    <param name="infile2" type="data" format="tabular" label="@LABEL2@"/>
+    <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
+  </xml>
+
+  <xml name="multiple_input" token_name="input_files" token_max_num="10" token_format="txt" token_label="Sparse matrix file (.mtx, .txt)" token_help_text="Specify a sparse matrix file in .txt format.">
+    <repeat name="@NAME@" min="1" max="@MAX_NUM@" title="Select input file(s):">
+        <param name="input" type="data" format="@FORMAT@" label="@LABEL@" help="@HELP_TEXT@"/>
+    </repeat>
+  </xml>
+
+  <xml name="sparse_target" token_label1="Select a sparse matrix:" token_label2="Select the tabular containing true labels:" token_multiple="False" token_format1="txt" token_format2="tabular" token_help1="" token_help2="">
+    <param name="infile1" type="data" format="@FORMAT1@" label="@LABEL1@" help="@HELP1@"/>
+    <param name="infile2" type="data" format="@FORMAT2@" label="@LABEL2@" help="@HELP2@"/>
+    <param name="col2" multiple="@MULTIPLE@" type="data_column" data_ref="infile2" label="Select target column(s):"/>
+  </xml>
+
+  <xml name="sl_mixed_input">
+    <conditional name="input_options">
+      <param name="selected_input" type="select" label="Select input type:">
+          <option value="tabular" selected="true">tabular data</option>
+          <option value="sparse">sparse matrix</option>
+      </param>
+      <when value="tabular">
+          <expand macro="samples_tabular" multiple1="true"/>
+      </when>
+      <when value="sparse">
+          <expand macro="sparse_target"/>
+      </when>
+    </conditional>
+  </xml>
+
+  <xml name="multitype_input" token_format="tabular" token_help="All datasets with tabular format are supporetd.">
+    <param name="infile_transform" type="data" format="@FORMAT@" label="Select a dataset to transform:" help="@HELP@"/>
+  </xml>
+
+
+  <!--Advanced options-->
+  <xml name="nn_advanced_options">
+    <section name="options" title="Advanced Options" expanded="False">
+      <yield/>
+      <param argument="weights" type="select" label="Weight function" help="Used in prediction.">
+          <option value="uniform" selected="true">Uniform weights. All points in each neighborhood are weighted equally. (Uniform)</option>
+          <option value="distance">Weight points by the inverse of their distance. (Distance)</option>
+      </param>
+      <param argument="algorithm" type="select" label="Neighbor selection algorithm" help=" ">
+          <option value="auto" selected="true">Auto</option>
+          <option value="ball_tree">BallTree</option>
+          <option value="kd_tree">KDTree</option>
+          <option value="brute">Brute-force</option>
+      </param>
+      <param argument="leaf_size" type="integer" value="30" label="Leaf size" help="Used with BallTree and KDTree. Affects the time and memory usage of the constructed tree."/>
+      <!--param name="metric"-->
+      <!--param name="p"-->
+      <!--param name="metric_params"-->
+    </section>
+  </xml>
+
+  <xml name="svc_advanced_options">
+    <section name="options" title="Advanced Options" expanded="False">
+        <yield/>
+        <param argument="kernel" type="select" optional="true" label="Kernel type" help="Kernel type to be used in the algorithm. If none is given, ‘rbf’ will be used.">
+            <option value="rbf" selected="true">rbf</option>
+            <option value="linear">linear</option>
+            <option value="poly">poly</option>
+            <option value="sigmoid">sigmoid</option>
+            <option value="precomputed">precomputed</option>
+        </param>
+        <param argument="degree" type="integer" optional="true" value="3" label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/>
+        <!--TODO: param argument="gamma" float, optional (default=’auto’) -->
+        <param argument="coef0" type="float" optional="true" value="0.0" label="Zero coefficient (polynomial and sigmoid kernels only)"
+            help="Independent term in kernel function. dafault: 0.0 "/>
+        <param argument="shrinking" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Use the shrinking heuristic" help=" "/>
+        <param argument="probability" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false"
+            label="Enable probability estimates. " help="This must be enabled prior to calling fit, and will slow down that method."/>
+        <!-- param argument="cache_size"-->
+        <!--expand macro="class_weight"/-->
+        <expand macro="tol" default_value="0.001" help_text="Tolerance for stopping criterion. "/>
+        <expand macro="max_iter" default_value="-1" label="Solver maximum number of iterations" help_text="Hard limit on iterations within solver, or -1 for no limit."/>
+        <!--param argument="decision_function_shape"-->
+        <expand macro="random_state" help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data for probability estimation. A fixed seed allows reproducible results."/>
+    </section>
+  </xml>
+
+  <xml name="spectral_clustering_advanced_options">
+    <section name="options" title="Advanced Options" expanded="False">
+        <expand macro="n_clusters"/>
+        <param argument="eigen_solver" type="select" value="" label="Eigen solver" help="The eigenvalue decomposition strategy to use.">
+            <option value="arpack" selected="true">arpack</option>
+            <option value="lobpcg">lobpcg</option>
+            <option value="amg">amg</option>
+            <!--None-->
+        </param>
+        <expand macro="random_state"/>
+        <expand macro="n_init"/>
+        <param argument="gamma" type="float" optional="true" value="1.0" label="Kernel scaling factor" help="Scaling factor of RBF, polynomial, exponential chi^2 and sigmoid affinity kernel. Ignored for affinity=''nearest_neighbors''."/>
+        <param argument="affinity" type="select" label="Affinity" help="Affinity kernel to use. ">
+            <option value="rbf" selected="true">RBF</option>
+            <option value="precomputed">precomputed</option>
+            <option value="nearest_neighbors">Nearset neighbors</option>
+        </param>
+        <param argument="n_neighbors" type="integer" optional="true" value="10" label="Number of neighbors" help="Number of neighbors to use when constructing the affinity matrix using the nearest neighbors method. Ignored for affinity=''rbf''"/>
+        <!--param argument="eigen_tol"-->
+        <param argument="assign_labels" type="select" label="Assign labels" help="The strategy to use to assign labels in the embedding space.">
+            <option value="kmeans" selected="true">kmeans</option>
+            <option value="discretize">discretize</option>
+        </param>
+        <param argument="degree" type="integer" optional="true" value="3"
+            label="Degree of the polynomial (polynomial kernel only)" help="Ignored by other kernels. dafault : 3 "/>
+        <param argument="coef0" type="integer" optional="true" value="1"
+            label="Zero coefficient (polynomial and sigmoid kernels only)" help="Ignored by other kernels. dafault : 1 "/>
+        <!--param argument="kernel_params"-->
+    </section>
+  </xml>
+
+  <xml name="minibatch_kmeans_advanced_options">
+    <section name="options" title="Advanced Options" expanded="False">
+        <expand macro="n_clusters"/>
+        <expand macro="init"/>
+        <expand macro="n_init" default_value="3"/>
+        <expand macro="max_iter" default_value="100"/>
+        <expand macro="tol" help_text="Early stopping heuristics based on normalized center change. To disable set to 0.0 ."/>
+        <expand macro="random_state"/>
+        <param argument="batch_size" type="integer" optional="true" value="100" label="Batch size" help="Size of the mini batches."/>
+        <!--param argument="compute_labels"-->
+        <param argument="max_no_improvement" type="integer" optional="true" value="10" label="Maximum number of improvement attempts" help="
+        Convergence detection based on inertia (the consecutive number of mini batches that doe not yield an improvement on the smoothed inertia).
+        To disable, set max_no_improvement to None. "/>
+        <param argument="init_size" type="integer" optional="true" value="" label="Number of random initialization samples" help="Number of samples to randomly sample for speeding up the initialization . ( default: 3 * batch_size )"/>
+        <param argument="reassignment_ratio" type="float" optional="true" value="0.01" label="Re-assignment ratio" help="Controls the fraction of the maximum number of counts for a center to be reassigned. Higher values yield better clustering results."/>
+    </section>
+  </xml>
+
+  <xml name="kmeans_advanced_options">
+    <section name="options" title="Advanced Options" expanded="False">
+      <expand macro="n_clusters"/>
+      <expand macro="init"/>
+      <expand macro="n_init"/>
+      <expand macro="max_iter"/>
+      <expand macro="tol" default_value="0.0001" help_text="Relative tolerance with regards to inertia to declare convergence."/>
+      <!--param argument="precompute_distances"/-->
+      <expand macro="random_state"/>
+      <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/>
+    </section>
+  </xml>
+
+  <xml name="birch_advanced_options">
+    <section name="options" title="Advanced Options" expanded="False">
+      <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster."/>
+      <param argument="branching_factor" type="integer" optional="true" value="50" label="Maximum number of subclusters per branch" help="Maximum number of CF subclusters in each node."/>
+      <expand macro="n_clusters" default_value="3"/>
+      <!--param argument="compute_labels"/-->
+    </section>
+  </xml>
+
+  <xml name="dbscan_advanced_options">
+    <section name="options" title="Advanced Options" expanded="False">
+      <param argument="eps" type="float" optional="true" value="0.5" label="Maximum neighborhood distance" help="The maximum distance between two samples for them to be considered as in the same neighborhood."/>
+      <param argument="min_samples" type="integer" optional="true" value="5" label="Minimal core point density" help="The number of samples (or total weight) in a neighborhood for a point (including the point itself) to be considered as a core point."/>
+      <param argument="metric" type="text" optional="true" value="euclidean" label="Metric" help="The metric to use when calculating distance between instances in a feature array."/>
+      <param argument="algorithm" type="select" label="Pointwise distance computation algorithm" help="The algorithm to be used by the NearestNeighbors module to compute pointwise distances and find nearest neighbors.">
+          <option value="auto" selected="true">auto</option>
+          <option value="ball_tree">ball_tree</option>
+          <option value="kd_tree">kd_tree</option>
+          <option value="brute">brute</option>
+      </param>
+      <param argument="leaf_size" type="integer" optional="true" value="30" label="Leaf size" help="Leaf size passed to BallTree or cKDTree. Memory and time efficieny factor in tree constrution and querying."/>
+    </section>
+  </xml>
+
+  <xml name="clustering_algorithms_options">
+    <conditional name="algorithm_options">
+      <param name="selected_algorithm" type="select" label="Clustering Algorithm">
+          <option value="KMeans" selected="true">KMeans</option>
+          <option value="SpectralClustering">Spectral Clustering</option>
+          <option value="MiniBatchKMeans">Mini Batch KMeans</option>
+          <option value="DBSCAN">DBSCAN</option>
+          <option value="Birch">Birch</option>
+      </param>
+      <when value="KMeans">
+          <expand macro="kmeans_advanced_options"/>
+      </when>
+      <when value="DBSCAN">
+          <expand macro="dbscan_advanced_options"/>
+      </when>
+      <when value="Birch">
+          <expand macro="birch_advanced_options"/>
+      </when>
+      <when value="SpectralClustering">
+          <expand macro="spectral_clustering_advanced_options"/>
+      </when>
+      <when value="MiniBatchKMeans">
+          <expand macro="minibatch_kmeans_advanced_options"/>
+      </when>
+    </conditional>
+  </xml>
+
+  <xml name="distance_metrics">
+    <param argument="metric" type="select" label="Distance metric" help=" ">
+      <option value="euclidean" selected="true">euclidean</option>
+      <option value="cityblock">cityblock</option>
+      <option value="cosine">cosine</option>
+      <option value="l1">l1</option>
+      <option value="l2">l2</option>
+      <option value="manhattan">manhattan</option>
+      <yield/>
+    </param>
+  </xml>
+
+  <xml name="distance_nonsparse_metrics">
+    <option value="braycurtis">braycurtis</option>
+    <option value="canberra">canberra</option>
+    <option value="chebyshev">chebyshev</option>
+    <option value="correlation">correlation</option>
+    <option value="dice">dice</option>
+    <option value="hamming">hamming</option>
+    <option value="jaccard">jaccard</option>
+    <option value="kulsinski">kulsinski</option>
+    <option value="mahalanobis">mahalanobis</option>
+    <option value="matching">matching</option>
+    <option value="minkowski">minkowski</option>
+    <option value="rogerstanimoto">rogerstanimoto</option>
+    <option value="russellrao">russellrao</option>
+    <option value="seuclidean">seuclidean</option>
+    <option value="sokalmichener">sokalmichener</option>
+    <option value="sokalsneath">sokalsneath</option>
+    <option value="sqeuclidean">sqeuclidean</option>
+    <option value="yule">yule</option>
+  </xml>
+
+  <xml name="pairwise_kernel_metrics">
+    <param argument="metric" type="select" label="Pirwise Kernel metric" help=" ">
+      <option value="rbf" selected="true">rbf</option>
+      <option value="sigmoid">sigmoid</option>
+      <option value="polynomial">polynomial</option>
+      <option value="linear" selected="true">linear</option>
+      <option value="chi2">chi2</option>
+      <option value="additive_chi2">additive_chi2</option>
+    </param>
+  </xml>
+
+  <xml name="sparse_pairwise_metric_functions">
+    <param name="selected_metric_function" type="select" label="Select the pairwise metric you want to compute:">
+      <option value="euclidean_distances" selected="true">Euclidean distance matrix</option>
+      <option value="pairwise_distances">Distance matrix</option>
+      <option value="pairwise_distances_argmin">Minimum distances between one point and a set of points</option>
+      <yield/>
+    </param>
+  </xml>
+
+  <xml name="pairwise_metric_functions">
+    <option value="additive_chi2_kernel" >Additive chi-squared kernel</option>
+    <option value="chi2_kernel">Exponential chi-squared kernel</option>
+    <option value="linear_kernel">Linear kernel</option>
+    <option value="manhattan_distances">L1 distances</option>
+    <option value="pairwise_kernels">Kernel</option>
+    <option value="polynomial_kernel">Polynomial kernel</option>
+    <option value="rbf_kernel">Gaussian (rbf) kernel</option>
+    <option value="laplacian_kernel">Laplacian kernel</option>
+  </xml>
+
+  <xml name="sparse_pairwise_condition">
+    <when value="pairwise_distances">
+      <section name="options" title="Advanced Options" expanded="False">
+          <expand macro="distance_metrics">
+              <yield/>
+          </expand>
+      </section>
+    </when>
+    <when value="euclidean_distances">
+      <section name="options" title="Advanced Options" expanded="False">
+          <param argument="squared" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false"
+            label="Return squared Euclidean distances" help=" "/>
+      </section>
+    </when>
+  </xml>
+
+  <xml name="argmin_distance_condition">
+    <when value="pairwise_distances_argmin">
+      <section name="options" title="Advanced Options" expanded="False">
+          <param argument="axis" type="integer" optional="true" value="1" label="Axis" help="Axis along which the argmin and distances are to be computed."/>
+          <expand macro="distance_metrics">
+              <yield/>
+          </expand>
+          <param argument="batch_size" type="integer" optional="true" value="500" label="Batch size" help="Number of rows to be processed in each batch run."/>
+      </section>
+    </when>
+  </xml>
+
+  <xml name="sparse_preprocessors">
+    <param name="selected_pre_processor" type="select" label="Select a preprocessor:">
+      <option value="StandardScaler" selected="true">Standard Scaler (Standardizes features by removing the mean and scaling to unit variance)</option>
+      <option value="Binarizer">Binarizer (Binarizes data)</option>
+      <option value="Imputer">Imputer (Completes missing values)</option>
+      <option value="MaxAbsScaler">Max Abs Scaler (Scales features by their maximum absolute value)</option>
+      <option value="Normalizer">Normalizer (Normalizes samples individually to unit norm)</option>
+      <yield/>
+    </param>
+  </xml>
+
+  <xml name="sparse_preprocessor_options">
+    <when value="Binarizer">
+        <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
+        <section name="options" title="Advanced Options" expanded="False">
+            <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+                label="Use a copy of data for precomputing binarization" help=" "/>
+            <param argument="threshold" type="float" optional="true" value="0.0"
+                label="Threshold"
+                help="Feature values below or equal to this are replaced by 0, above it by 1. Threshold may not be less than 0 for operations on sparse matrices. "/>
+        </section>
+    </when>
+    <when value="Imputer">
+      <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
+      <section name="options" title="Advanced Options" expanded="False">
+          <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Use a copy of data for precomputing imputation" help=" "/>
+          <param argument="strategy" type="select" optional="true" label="Imputation strategy" help=" ">
+              <option value="mean" selected="true">Replace missing values using the mean along the axis</option>
+              <option value="median">Replace missing values using the median along the axis</option>
+              <option value="most_frequent">Replace missing using the most frequent value along the axis</option>
+          </param>
+          <param argument="missing_values" type="text" optional="true" value="NaN"
+                label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/>
+          <param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0"
+                label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/>
+          <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" ">
+              <option value="0" selected="true">Impute along columns</option>
+              <option value="1">Impute along rows</option>
+          </param-->
+      </section>
+    </when>
+    <when value="StandardScaler">
+      <expand macro="multitype_input"/>
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Use a copy of data for performing inplace scaling" help=" "/>
+        <param argument="with_mean" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Center the data before scaling" help=" "/>
+        <param argument="with_std" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Scale the data to unit variance (or unit standard deviation)" help=" "/>
+      </section>
+    </when>
+    <when value="MaxAbsScaler">
+      <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Use a copy of data for precomputing scaling" help=" "/>
+      </section>
+    </when>
+    <when value="Normalizer">
+      <expand macro="multitype_input" format="tabular,txt" help="Tabular and sparse datasets are supporetd."/>
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="norm" type="select" optional="true" label="The norm to use to normalize non zero samples" help=" ">
+          <option value="l1" selected="true">l1</option>
+          <option value="l2">l2</option>
+          <option value="max">max</option>
+        </param>
+        <param argument="copy" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true"
+            label="Use a copy of data for precomputing row normalization" help=" "/>
+      </section>
+    </when>
+    <yield/>
+  </xml>
+  <xml name="feature_selection_score_function">
+    <param argument="score_func" type="select" label="Select a score function">
+      <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option>
+      <option value="f_classif">f_classif - Compute the ANOVA F-value for the provided sample</option>
+      <option value="f_regression">f_regression - Univariate linear regression tests</option>
+      <option value="mutual_info_classif">mutual_info_classif - Estimate mutual information for a discrete target variable</option>
+      <option value="mutual_info_regression">mutual_info_regression - Estimate mutual information for a continuous target variable</option>
+    </param>
+  </xml>
+  <xml name="feature_selection_estimator">
+    <param argument="estimator" type="select" label="Select an estimator" help="The base estimator from which the transformer is built.">
+      <option value="svm.SVR(kernel=&quot;linear&quot;)">svm.SVR(kernel=&quot;linear&quot;)</option>
+      <option value="svm.SVC(kernel=&quot;linear&quot;)">svm.SVC(kernel=&quot;linear&quot;)</option>
+      <option value="svm.LinearSVC(penalty=&quot;l1&quot;, dual=False, tol=1e-3)">svm.LinearSVC(penalty=&quot;l1&quot;, dual=False, tol=1e-3)</option>
+      <option value="linear_model.LassoCV()">linear_model.LassoCV()</option>
+      <option value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)">ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)</option>
+    </param>
+  </xml>
+  <xml name="feature_selection_extra_estimator">   
+      <param name="has_estimator" type="select" label="Does your estimator on the list above?">
+        <option value="yes">Yes, my estimator is on the list</option>
+        <option value="no">No, I need make a new estimator</option>
+        <yield/>
+      </param>
+  </xml>
+  <xml name="feature_selection_estimator_choices">
+    <when value="yes">
+    </when>
+    <when value="no">
+      <param name="new_estimator" type="text" value="" label="Make a new estimator" />
+    </when>
+    <yield/>
+  </xml>
+  <xml name="feature_selection_methods">
+    <conditional name="select_methods">
+      <param name="selected_method" type="select" label="Select an operation">
+          <option value="fit_transform">fit_transform - Fit to data, then transform it</option>
+          <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option>
+      </param>
+      <when value="fit_transform">
+        <!--**fit_params-->
+      </when>
+      <when value="get_support">
+        <param name="indices" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Indices" help="If True, the return value will be an array of integers, rather than a boolean mask."/>
+      </when>
+    </conditional>
+  </xml>
+
+  <!-- Outputs -->
+
+  <xml name="output">
+    <outputs>
+      <data format="tabular" name="outfile_predict">
+          <filter>selected_tasks['selected_task'] == 'load'</filter>
+      </data>
+      <data format="zip" name="outfile_fit">
+          <filter>selected_tasks['selected_task'] == 'train'</filter>
+      </data>
+    </outputs>
+  </xml>
+
+
+  <!--Citations-->
+  <xml name="eden_citation">
+    <citations>
+        <citation type="doi">10.5281/zenodo.15094</citation>
+    </citations>
+  </xml>
+
+  <xml name="sklearn_citation">
+    <citations>
+        <citation type="bibtex">
+            @article{scikit-learn,
+             title={Scikit-learn: Machine Learning in {P}ython},
+             author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
+                     and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
+                     and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
+                     Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
+             journal={Journal of Machine Learning Research},
+             volume={12},
+             pages={2825--2830},
+             year={2011}
+             url = {https://github.com/scikit-learn/scikit-learn}
+            }
+        </citation>
+    </citations>
+  </xml>
+
+  <xml name="scipy_citation">
+    <citations>
+        <citation type="bibtex">
+          @Misc{,
+          author =    {Eric Jones and Travis Oliphant and Pearu Peterson and others},
+          title =     {{SciPy}: Open source scientific tools for {Python}},
+          year =      {2001--},
+          url = "http://www.scipy.org/",
+          note = {[Online; accessed 2016-04-09]}
+        }
+        </citation>
+    </citations>
+  </xml>
+
+</macros>
author	bgruening
date	Thu, 12 Apr 2018 08:23:30 -0400
parents
children	58322d3c7bd3