Mercurial > repos > bgruening > sklearn_feature_selection

diff main_macros.xml @ 17:2bbbac61e48d draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
author: bgruening
date: Sun, 30 Dec 2018 01:57:11 -0500
parents: 026667802750
children: ec25331946b8
--- a/main_macros.xml	Thu Oct 11 03:34:39 2018 -0400
+++ b/main_macros.xml	Sun Dec 30 01:57:11 2018 -0500
@@ -1,13 +1,13 @@
 <macros>
-  <token name="@VERSION@">0.9</token>
+  <token name="@VERSION@">1.0</token>
 
   <xml name="python_requirements">
       <requirements>
           <requirement type="package" version="3.6">python</requirement>
-          <requirement type="package" version="0.19.1">scikit-learn</requirement>
-          <requirement type="package" version="0.22.0">pandas</requirement>
-          <requirement type="package" version="0.72.1">xgboost</requirement>
-          <requirement type="package" version="0.9.12">asteval</requirement>
+          <requirement type="package" version="0.20.2">scikit-learn</requirement>
+          <requirement type="package" version="0.23.4">pandas</requirement>
+          <requirement type="package" version="0.80">xgboost</requirement>
+          <requirement type="package" version="0.9.13">asteval</requirement>
           <yield />
       </requirements>
   </xml>
@@ -244,7 +244,7 @@
     <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/>
   </xml>
 
-  <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results.">
+  <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results. default=None.">
     <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@"/>
   </xml>
 
@@ -346,20 +346,20 @@
   <xml name="samples_column_selector_options" token_column_option="selected_column_selector_option" token_col_name="col1" token_multiple="False" token_infile="infile1">
     <param name="@COLUMN_OPTION@" type="select" label="Choose how to select data by column:">
       <option value="by_index_number" selected="true">Select columns by column index number(s)</option>
+      <option value="all_but_by_index_number">All columns BUT by column index number(s)</option>
       <option value="by_header_name">Select columns by column header name(s)</option>
-      <option value="all_but_by_index_number">All columns but by column index number(s)</option>
-      <option value="all_but_by_header_name">All columns but by column header name(s)</option>
+      <option value="all_but_by_header_name">All columns BUT by column header name(s)</option>
       <option value="all_columns">All columns</option>
     </param>
     <when value="by_index_number">
       <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
     </when>
+    <when value="all_but_by_index_number">
+      <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
+    </when>
     <when value="by_header_name">
       <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/>
     </when>
-    <when value="all_but_by_index_number">
-      <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/>
-    </when>
     <when value="all_but_by_header_name">
       <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/>
     </when>
@@ -543,9 +543,18 @@
       <!--param argument="precompute_distances"/-->
       <expand macro="random_state"/>
       <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/>
+      <expand macro="kmeans_algorithm"/>
     </section>
   </xml>
 
+  <xml name="kmeans_algorithm">
+    <param argument="algorithm" type="select" label="K-means algorithm to use:">
+      <option value="auto" selected="true">auto</option>
+      <option value="full">full</option>
+      <option value="elkan">elkan</option>
+    </param>
+  </xml>
+
   <xml name="birch_advanced_options">
     <section name="options" title="Advanced Options" expanded="False">
       <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster."/>
@@ -730,8 +739,8 @@
           </param>
           <param argument="missing_values" type="text" optional="true" value="NaN"
                 label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/>
-          <param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0"
-                label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/>
+          <!--param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0"
+                label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/> -->
           <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" ">
               <option value="0" selected="true">Impute along columns</option>
               <option value="1">Impute along rows</option>
@@ -802,136 +811,285 @@
     </expand>
   </xml>
 
-  <xml name="fs_selectfrommodel_prefitted">
-    <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" >
-      <option value="new" selected="true">Yes</option>
-      <option value="prefitted">No. Load a prefitted estimator</option>
-    </param>
-    <when value="new">
-      <expand macro="estimator_selector_all"/>
-    </when>
-    <when value="prefitted">
-      <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" />
-    </when>
+  <xml name="cv_splitter">
+    <option value="default" selected="true">default splitter</option>
+    <option value="KFold">KFold</option>
+    <option value="StratifiedKFold">StratifiedKFold</option>
+    <option value="LeaveOneOut">LeaveOneOut</option>
+    <option value="LeavePOut">LeavePOut</option>
+    <option value="RepeatedKFold">RepeatedKFold</option>
+    <option value="RepeatedStratifiedKFold">RepeatedStratifiedKFold</option>
+    <option value="ShuffleSplit">ShuffleSplit</option>
+    <option value="StratifiedShuffleSplit">StratifiedShuffleSplit</option>
+    <option value="TimeSeriesSplit">TimeSeriesSplit</option>
+    <option value="PredefinedSplit">PredefinedSplit</option>
+    <yield/>
   </xml>
 
-  <xml name="fs_selectfrommodel_no_prefitted">
-    <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" >
-      <option value="new" selected="true">Yes</option>
-    </param>
-    <when value="new">
-      <expand macro="estimator_selector_all"/>
+  <xml name="cv_splitter_options">
+    <when value="default">
+      <expand macro="cv_n_splits"/>
+    </when>
+    <when value="KFold">
+      <expand macro="cv_n_splits"/>
+      <expand macro="cv_shuffle"/>
+      <expand macro="random_state"/>
+    </when>
+    <when value="StratifiedKFold">
+      <expand macro="cv_n_splits"/>
+      <expand macro="cv_shuffle"/>
+      <expand macro="random_state"/>
+    </when>
+    <when value="LeaveOneOut">
+    </when>
+    <when value="LeavePOut">
+      <param argument="p" type="integer" value="" label="p" help="Integer. Size of the test sets."/>
+    </when>
+    <when value="RepeatedKFold">
+      <expand macro="cv_n_splits" value="5"/>
+      <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." />
+      <expand macro="random_state" />
     </when>
+    <when value="RepeatedStratifiedKFold">
+      <expand macro="cv_n_splits" value="5"/>
+      <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." />
+      <expand macro="random_state" />
+    </when>
+    <when value="ShuffleSplit">
+      <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations."/>
+      <expand macro="cv_test_size" value="0.1" />
+      <expand macro="random_state"/>
+    </when>
+    <when value="StratifiedShuffleSplit">
+      <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations."/>
+      <expand macro="cv_test_size" value="0.1" />
+      <expand macro="random_state"/>
+    </when>
+    <when value="TimeSeriesSplit">
+      <expand macro="cv_n_splits"/>
+      <param argument="max_train_size" type="integer" value="" optional="true" label="Maximum size of the training set" help="Maximum size for a single training set." />
+    </when>
+    <when value="PredefinedSplit">
+      <param argument="test_fold" type="text" value="" area="true" label="test_fold" help="List, e.g., [0, 1, -1, 1], represents two test sets, [X[0]] and [X[1], X[3]], X[2] is excluded from any test set due to '-1'."/>
+    </when>
+    <yield/>
   </xml>
 
   <xml name="cv">
-    <param argument="cv" type="text" value="" optional="true" label="cv" help="Optional. Integer or evalable splitter object, e.g., StratifiedKFold(n_splits=3, shuffle=True, random_state=10). Leave blank for default." >
-      <sanitizer>
-        <valid initial="default">
-          <add value="&apos;"/>
-        </valid>
-      </sanitizer>
-    </param>
+    <conditional name="cv_selector">
+      <param name="selected_cv" type="select" label="Select the cv splitter:">
+        <expand macro="cv_splitter">
+          <option value="GroupKFold">GroupKFold</option>
+          <option value="GroupShuffleSplit">GroupShuffleSplit</option>
+          <option value="LeaveOneGroupOut">LeaveOneGroupOut</option>
+          <option value="LeavePGroupsOut">LeavePGroupsOut</option>
+        </expand>
+      </param>
+      <expand macro="cv_splitter_options">
+        <when value="GroupKFold">
+          <expand macro="cv_n_splits"/>
+          <expand macro="cv_groups" />
+        </when>
+        <when value="GroupShuffleSplit">
+          <expand macro="cv_n_splits" value="5"/>
+          <expand macro="cv_test_size"/>
+          <expand macro="random_state"/>
+          <expand macro="cv_groups"/>
+        </when>
+        <when value="LeaveOneGroupOut">
+          <expand macro="cv_groups"/>
+        </when>
+        <when value="LeavePGroupsOut">
+          <param argument="n_groups" type="integer" value="" label="n_groups" help="Number of groups (p) to leave out in the test split." />
+          <expand macro="cv_groups"/>
+        </when>
+      </expand>
+    </conditional>
+  </xml>
+
+  <xml name="cv_reduced">
+    <conditional name="cv_selector">
+      <param name="selected_cv" type="select" label="Select the cv splitter:">
+        <expand macro="cv_splitter"/>
+      </param>
+      <expand macro="cv_splitter_options"/>
+    </conditional>
+  </xml>
+
+  <xml name="cv_n_splits" token_value="3" token_help="Number of folds. Must be at least 2.">
+    <param argument="n_splits" type="integer" value="@VALUE@" min="2" label="n_splits" help="@HELP@"/>
+  </xml>
+
+  <xml name="cv_shuffle">
+    <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to shuffle data before splitting" />
+  </xml>
+
+  <xml name="cv_test_size" token_value="0.2">
+    <param argument="test_size" type="float" value="@VALUE@" min="0.0" label="Portion or number of the test set" help="0.0-1.0, proportion of the dataset to include in the test split; >1, integer only, the absolute number of test samples "/>
+  </xml>
+
+  <xml name="cv_groups" >
+    <param argument="groups" type="text" value="" area="true" label="Groups" help="Group lables in a list. e.g., [1, 1, 2, 2, 3, 3, 3]"/>
+  </xml>
+
+  <xml name="feature_selection_algorithms">
+    <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option>
+    <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option>
+    <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option>
+    <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option>
+    <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option>
+    <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option>
+    <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option>
+    <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>
+    <option value="RFE">RFE - Feature ranking with recursive feature elimination</option>
+    <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>
   </xml>
 
-  <xml name="feature_selection_all">
+  <xml name="feature_selection_algorithm_details">
+    <when value="GenericUnivariateSelect">
+      <expand macro="feature_selection_score_function" />
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="mode" type="select" label="Feature selection mode">
+          <option value="percentile">percentile</option>
+          <option value="k_best">k_best</option>
+          <option value="fpr">fpr</option>
+          <option value="fdr">fdr</option>
+          <option value="fwe">fwe</option>
+        </param>
+        <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" />
+      </section>
+    </when>
+    <when value="SelectPercentile">
+      <expand macro="feature_selection_score_function" />
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" />
+      </section>
+    </when>
+    <when value="SelectKBest">
+      <expand macro="feature_selection_score_function" />
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." />
+      </section>
+    </when>
+    <when value="SelectFpr">
+      <expand macro="feature_selection_score_function" />
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/>
+      </section>
+    </when>
+    <when value="SelectFdr">
+      <expand macro="feature_selection_score_function" />
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
+      </section>
+    </when>
+    <when value="SelectFwe">
+      <expand macro="feature_selection_score_function" />
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
+      </section>
+    </when>
+    <when value="VarianceThreshold">
+      <section name="options" title="Options" expanded="False">
+        <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/>
+      </section>
+    </when>
+  </xml>
+
+  <xml name="feature_selection_SelectFromModel">
+    <when value="SelectFromModel">
+      <conditional name="model_inputter">
+        <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" >
+          <option value="new" selected="true">Yes</option>
+          <option value="prefitted">No. Load a prefitted estimator</option>
+        </param>
+        <when value="new">
+          <expand macro="estimator_selector_fs"/>
+        </when>
+        <when value="prefitted">
+          <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" />
+        </when>
+      </conditional>
+      <expand macro="feature_selection_SelectFromModel_options"/>
+    </when>
+  </xml>
+
+  <xml name="feature_selection_SelectFromModel_no_prefitted">
+    <when value="SelectFromModel">
+      <conditional name="model_inputter">
+        <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" >
+          <option value="new" selected="true">Yes</option>
+        </param>
+        <when value="new">
+          <expand macro="estimator_selector_all"/>
+        </when>
+      </conditional>
+      <expand macro="feature_selection_SelectFromModel_options"/>
+    </when>
+  </xml>
+
+  <xml name="feature_selection_SelectFromModel_options">
+    <section name="options" title="Advanced Options" expanded="False">
+      <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." />
+      <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " />
+      <param argument="max_features" type="integer" value="" optional="true" label="The maximum number of features selected scoring above threshold" help="To disable threshold and only select based on max_features, set threshold=-np.inf."/>
+    </section>
+  </xml>
+
+  <xml name="feature_selection_RFE">
+    <when value="RFE">
+      <yield/>
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." />
+        <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
+        <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
+      </section>
+    </when>
+  </xml>
+
+  <xml name="feature_selection_RFECV">
+    <when value="RFECV">
+      <yield/>
+      <section name="options" title="Advanced Options" expanded="False">
+        <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
+        <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/>
+        <expand macro="cv_reduced"/>
+        <expand macro="scoring_selection"/>
+        <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
+      </section>
+    </when>
+  </xml>
+
+  <xml name="feature_selection_pipeline">
+    <!--compare to `feature_selection_fs`, no fitted estimator for SelectFromModel and no customer estimator for RFE and RFECV-->
     <conditional name="fs_algorithm_selector">
       <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
-        <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option>
-        <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>
-        <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option>
-        <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option>
-        <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option>
-        <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option>
-        <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option>
-        <option value="RFE">RFE - Feature ranking with recursive feature elimination</option>
-        <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>
-        <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option>
+        <expand macro="feature_selection_algorithms"/>
       </param>
-      <when value="SelectFromModel">
-        <conditional name="model_inputter">
-          <yield/>
-        </conditional>
-        <section name="options" title="Advanced Options" expanded="False">
-          <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." />
-          <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " />
-        </section>
-      </when>
-      <when value="GenericUnivariateSelect">
-        <expand macro="feature_selection_score_function" />
-        <section name="options" title="Advanced Options" expanded="False">
-          <param argument="mode" type="select" label="Feature selection mode">
-            <option value="percentile">percentile</option>
-            <option value="k_best">k_best</option>
-            <option value="fpr">fpr</option>
-            <option value="fdr">fdr</option>
-            <option value="fwe">fwe</option>
-          </param>
-          <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" />
-        </section>
-      </when>
-      <when value="SelectPercentile">
-        <expand macro="feature_selection_score_function" />
-        <section name="options" title="Advanced Options" expanded="False">
-          <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" />
-        </section>
-      </when>
-      <when value="SelectKBest">
-        <expand macro="feature_selection_score_function" />
-        <section name="options" title="Advanced Options" expanded="False">
-          <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." />
-        </section>
-      </when>
-      <when value="SelectFpr">
-        <expand macro="feature_selection_score_function" />
-        <section name="options" title="Advanced Options" expanded="False">
-          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/>
-        </section>
-      </when>
-      <when value="SelectFdr">
-        <expand macro="feature_selection_score_function" />
-        <section name="options" title="Advanced Options" expanded="False">
-          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
-        </section>
-      </when>
-      <when value="SelectFwe">
-        <expand macro="feature_selection_score_function" />
-        <section name="options" title="Advanced Options" expanded="False">
-          <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
-        </section>
-      </when>
-      <when value="RFE">
+      <expand macro="feature_selection_algorithm_details"/>
+      <expand macro="feature_selection_SelectFromModel_no_prefitted"/>
+      <expand macro="feature_selection_RFE">
+        <expand macro="estimator_selector_all"/>
+      </expand>  
+      <expand macro="feature_selection_RFECV">
         <expand macro="estimator_selector_all"/>
-        <section name="options" title="Advanced Options" expanded="False">
-          <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." />
-          <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
-          <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
-        </section>
-      </when>
-      <when value="RFECV">
-        <expand macro="estimator_selector_all"/>
-        <section name="options" title="Advanced Options" expanded="False">
-          <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
-          <expand macro="cv"/>
-          <expand macro="scoring_selection"/>
-          <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
-        </section>
-      </when>
-      <when value="VarianceThreshold">
-        <section name="options" title="Options" expanded="False">
-          <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/>
-        </section>
-      </when>
-      <!--when value="chi2">
-      </when>
-      <when value="f_classif">
-      </when>
-      <when value="f_regression">
-      </when>
-      <when value="mutual_info_classif">
-      </when>
-      <when value="mutual_info_regression">
-      </when-->
+      </expand>
+    </conditional>
+  </xml>
+
+  <xml name="feature_selection_fs">
+    <conditional name="fs_algorithm_selector">
+      <param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
+        <expand macro="feature_selection_algorithms"/>
+      </param>
+      <expand macro="feature_selection_algorithm_details"/>
+      <expand macro="feature_selection_SelectFromModel"/>
+      <expand macro="feature_selection_RFE">
+        <expand macro="estimator_selector_fs"/>
+      </expand>  
+      <expand macro="feature_selection_RFECV">
+        <expand macro="estimator_selector_fs"/>
+      </expand>
     </conditional>
   </xml>
 
@@ -945,21 +1103,6 @@
     </param>
   </xml>
 
-  <xml name="feature_selection_output_mothods">
-    <conditional name="output_method_selector">
-      <param name="selected_method" type="select" label="Select an output method:">
-          <option value="fit_transform">fit_transform - Fit to data, then transform it</option>
-          <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option>
-      </param>
-      <when value="fit_transform">
-        <!--**fit_params-->
-      </when>
-      <when value="get_support">
-        <param name="indices" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Indices" help="If True, the return value will be an array of integers, rather than a boolean mask."/>
-      </when>
-    </conditional>
-  </xml>
-
   <xml name="model_validation_common_options">
     <expand macro="cv"/>
     <expand macro="verbose"/>
@@ -1122,22 +1265,22 @@
       <expand macro="pre_dispatch" value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/>
       <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/>
       <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/>
-      <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to 0 if an error occurs in estimator fitting and FitFailedWarning is raised."/>
+      <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised."/>
       <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help=""/>
   </xml>
 
-  <xml name="estimator_selector_all">
-    <conditional name="estimator_selector">
-      <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
-        <option value="svm" selected="true">sklearn.svm</option>
-        <option value="linear_model">sklearn.linear_model</option>
-        <option value="ensemble">sklearn.ensemble</option>
-        <option value="naive_bayes">sklearn.naive_bayes</option>
-        <option value="tree">sklearn.tree</option>
-        <option value="neighbors">sklearn.neighbors</option>
-        <option value="xgboost">xgboost</option>
-        <!--more-->
-      </param>
+  <xml name="estimator_module_options">
+      <option value="svm" selected="true">sklearn.svm</option>
+      <option value="linear_model">sklearn.linear_model</option>
+      <option value="ensemble">sklearn.ensemble</option>
+      <option value="naive_bayes">sklearn.naive_bayes</option>
+      <option value="tree">sklearn.tree</option>
+      <option value="neighbors">sklearn.neighbors</option>
+      <option value="xgboost">xgboost</option>
+      <yield/>
+  </xml>
+
+  <xml name="estimator_suboptions">
       <when value="svm">
         <param name="selected_estimator" type="select" label="Choose estimator class:">
           <option value="LinearSVC" selected="true">LinearSVC</option>
@@ -1244,6 +1387,30 @@
         </param>
         <expand macro="estimator_params_text"/>
       </when>
+      <yield/>
+  </xml>
+
+  <xml name="estimator_selector_all">
+    <conditional name="estimator_selector">
+      <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
+        <expand macro="estimator_module_options"/>
+      </param>
+      <expand macro="estimator_suboptions"/>
+    </conditional>
+  </xml>
+
+  <xml name="estimator_selector_fs">
+    <conditional name="estimator_selector">
+      <param name="selected_module" type="select" label="Choose the module that contains target estimator:" >
+        <expand macro="estimator_module_options">
+            <option value="customer_estimator">Load a customer estimator</option>
+        </expand>
+      </param>
+      <expand macro="estimator_suboptions">
+        <when value="customer_estimator">
+            <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the customer estimator or pipeline:"/>
+        </when>
+      </expand>
     </conditional>
   </xml>
 
@@ -1373,7 +1540,7 @@
         <option value="SURFstar">SURFstar</option>
         <option value="MultiSURF">MultiSURF</option>
         <option value="MultiSURFstar">MultiSURFstar</option>
-        <option value="TuRF">TuRF</option>
+        <!--option value="TuRF">TuRF</option> -->
       </param>
       <when value="ReliefF">
         <expand macro="estimator_params_text"
@@ -1395,12 +1562,115 @@
         <expand macro="estimator_params_text"
               help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False."/>
       </when>
-      <when value="TuRF">
+      <!--when value="TuRF">
         <expand macro="estimator_params_text"
               help="Default(=blank): core_algorithm='ReliefF', discrete_threshold=10, n_features_to_select=10, n_neighbors=100, pct=0.5, verbose=False."/>
+      </when> -->
+    </conditional>
+  </xml>
+
+  <xml name="imbalanced_learn_sampling">
+    <conditional name="imblearn_selector">
+      <param name="select_algorithm" type="select" label="Choose the algorithm:">
+        <option value="under_sampling.ClusterCentroids" selected="true">under_sampling.ClusterCentroids</option>
+        <option value="under_sampling.CondensedNearestNeighbour">under_sampling.CondensedNearestNeighbour</option>
+        <option value="under_sampling.EditedNearestNeighbours">under_sampling.EditedNearestNeighbours</option>
+        <option value="under_sampling.RepeatedEditedNearestNeighbours">under_sampling.RepeatedEditedNearestNeighbours</option>
+        <option value="under_sampling.AllKNN">under_sampling.AllKNN</option>
+        <option value="under_sampling.InstanceHardnessThreshold">under_sampling.InstanceHardnessThreshold</option>
+        <option value="under_sampling.NearMiss">under_sampling.NearMiss</option>
+        <option value="under_sampling.NeighbourhoodCleaningRule">under_sampling.NeighbourhoodCleaningRule</option>
+        <option value="under_sampling.OneSidedSelection">under_sampling.OneSidedSelection</option>
+        <option value="under_sampling.RandomUnderSampler">under_sampling.RandomUnderSampler</option>
+        <option value="under_sampling.TomekLinks">under_sampling.TomekLinks</option>
+        <option value="over_sampling.ADASYN">over_sampling.ADASYN</option>
+        <option value="over_sampling.RandomOverSampler">over_sampling.RandomOverSampler</option>
+        <option value="over_sampling.SMOTE">over_sampling.SMOTE</option>
+        <option value="over_sampling.SVMSMOTE">over_sampling.SVMSMOTE</option>
+        <option value="over_sampling.BorderlineSMOTE">over_sampling.BorderlineSMOTE</option>
+        <option value="over_sampling.SMOTENC">over_sampling.SMOTENC</option>
+        <option value="combine.SMOTEENN">combine.SMOTEENN</option>
+        <option value="combine.SMOTETomek">combine.SMOTETomek</option>
+      </param>
+      <when value="under_sampling.ClusterCentroids">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, estimator=None, voting='auto'."/>
+      </when>
+      <when value="under_sampling.CondensedNearestNeighbour">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=None, n_seeds_S=1."/>
+      </when>
+      <when value="under_sampling.EditedNearestNeighbours">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, max_iter=100, kind_sel='all'."/>
+      </when>
+      <when value="under_sampling.RepeatedEditedNearestNeighbours">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, max_iter=100, kind_sel='all'."/>
+      </when>
+      <when value="under_sampling.AllKNN">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, kind_sel='all', allow_minority=False."/>
+      </when>
+      <when value="under_sampling.InstanceHardnessThreshold">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): estimator=None, sampling_strategy='auto', random_state=None, cv=5."/>
+      </when>
+      <when value="under_sampling.NearMiss">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, version=1, n_neighbors=3, n_neighbors_ver3=3."/>
+      </when>
+      <when value="under_sampling.NeighbourhoodCleaningRule">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, kind_sel='all', threshold_cleaning=0.5."/>
+      </when>
+      <when value="under_sampling.OneSidedSelection">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=None, n_seeds_S=1."/>
+      </when>
+      <when value="under_sampling.RandomUnderSampler">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, replacement=False."/>
+      </when>
+      <when value="under_sampling.TomekLinks">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None."/>
+      </when>
+      <when value="over_sampling.ADASYN">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=5."/>
+      </when>
+      <when value="over_sampling.RandomOverSampler">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None."/>
+      </when>
+      <when value="over_sampling.SMOTE">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, k_neighbors=5."/>
+      </when>
+      <when value="over_sampling.SVMSMOTE">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', k_neighbors=5, m_neighbors=10, out_step=0.5, random_state=None, svm_estimator=None."/>
+      </when>
+      <when value="over_sampling.BorderlineSMOTE">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', k_neighbors=5, kind='borderline-1', m_neighbors=10, random_state=None."/>
+      </when>
+      <when value="over_sampling.SMOTENC">
+        <expand macro="estimator_params_text"
+              help="Default: categorical_features=[], sampling_strategy='auto', random_state=None, k_neighbors=5."/>
+      </when>
+      <when value="combine.SMOTEENN">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, enn=None."/>
+      </when>
+      <when value="combine.SMOTETomek">
+        <expand macro="estimator_params_text"
+              help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, tomek=None."/>
       </when>
     </conditional>
   </xml>
+
   <!-- Outputs -->
 
   <xml name="output">
@@ -1498,4 +1768,19 @@
     </citation>
   </xml>
 
+    <xml name="imblearn_citation">
+    <citation type="bibtex">
+      @article{JMLR:v18:16-365,
+        author  = {Guillaume  Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas},
+        title   = {Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning},
+        journal = {Journal of Machine Learning Research},
+        year    = {2017},
+        volume  = {18},
+        number  = {17},
+        pages   = {1-5},
+        url     = {http://jmlr.org/papers/v18/16-365.html}
+      }
+    </citation>
+  </xml>
+
 </macros>
author	bgruening
date	Sun, 30 Dec 2018 01:57:11 -0500
parents	026667802750
children	ec25331946b8