Mercurial > repos > bgruening > sklearn_feature_selection
diff main_macros.xml @ 17:2bbbac61e48d draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
author | bgruening |
---|---|
date | Sun, 30 Dec 2018 01:57:11 -0500 |
parents | 026667802750 |
children | ec25331946b8 |
line wrap: on
line diff
--- a/main_macros.xml Thu Oct 11 03:34:39 2018 -0400 +++ b/main_macros.xml Sun Dec 30 01:57:11 2018 -0500 @@ -1,13 +1,13 @@ <macros> - <token name="@VERSION@">0.9</token> + <token name="@VERSION@">1.0</token> <xml name="python_requirements"> <requirements> <requirement type="package" version="3.6">python</requirement> - <requirement type="package" version="0.19.1">scikit-learn</requirement> - <requirement type="package" version="0.22.0">pandas</requirement> - <requirement type="package" version="0.72.1">xgboost</requirement> - <requirement type="package" version="0.9.12">asteval</requirement> + <requirement type="package" version="0.20.2">scikit-learn</requirement> + <requirement type="package" version="0.23.4">pandas</requirement> + <requirement type="package" version="0.80">xgboost</requirement> + <requirement type="package" version="0.9.13">asteval</requirement> <yield /> </requirements> </xml> @@ -244,7 +244,7 @@ <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="@LABEL@" help="@HELP_TEXT@"/> </xml> - <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results."> + <xml name="random_state" token_default_value="" token_help_text="Integer number. The seed of the pseudo random number generator to use when shuffling the data. A fixed seed allows reproducible results. default=None."> <param argument="random_state" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Random seed number" help="@HELP_TEXT@"/> </xml> @@ -346,20 +346,20 @@ <xml name="samples_column_selector_options" token_column_option="selected_column_selector_option" token_col_name="col1" token_multiple="False" token_infile="infile1"> <param name="@COLUMN_OPTION@" type="select" label="Choose how to select data by column:"> <option value="by_index_number" selected="true">Select columns by column index number(s)</option> + <option value="all_but_by_index_number">All columns BUT by column index number(s)</option> <option value="by_header_name">Select columns by column header name(s)</option> - <option value="all_but_by_index_number">All columns but by column index number(s)</option> - <option value="all_but_by_header_name">All columns but by column header name(s)</option> + <option value="all_but_by_header_name">All columns BUT by column header name(s)</option> <option value="all_columns">All columns</option> </param> <when value="by_index_number"> <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> </when> + <when value="all_but_by_index_number"> + <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> + </when> <when value="by_header_name"> <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/> </when> - <when value="all_but_by_index_number"> - <param name="@COL_NAME@" multiple="@MULTIPLE@" type="data_column" data_ref="@INFILE@" label="Select target column(s):"/> - </when> <when value="all_but_by_header_name"> <param name="@COL_NAME@" type="text" value="" label="Type header name(s):" help="Comma-separated string. For example: target1,target2"/> </when> @@ -543,9 +543,18 @@ <!--param argument="precompute_distances"/--> <expand macro="random_state"/> <param argument="copy_x" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Use a copy of data for precomputing distances" help="Mofifying the original data introduces small numerical differences caused by subtracting and then adding the data mean."/> + <expand macro="kmeans_algorithm"/> </section> </xml> + <xml name="kmeans_algorithm"> + <param argument="algorithm" type="select" label="K-means algorithm to use:"> + <option value="auto" selected="true">auto</option> + <option value="full">full</option> + <option value="elkan">elkan</option> + </param> + </xml> + <xml name="birch_advanced_options"> <section name="options" title="Advanced Options" expanded="False"> <param argument="threshold" type="float" optional="true" value="0.5" label="Subcluster radius threshold" help="The radius of the subcluster obtained by merging a new sample; the closest subcluster should be less than the threshold to avoid a new subcluster."/> @@ -730,8 +739,8 @@ </param> <param argument="missing_values" type="text" optional="true" value="NaN" label="Placeholder for missing values" help="For missing values encoded as numpy.nan, use the string value “NaN”"/> - <param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0" - label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/> + <!--param argument="axis" type="boolean" optional="true" truevalue="1" falsevalue="0" + label="Impute along axis = 1" help="If fasle, axis = 0 is selected for imputation. "/> --> <!--param argument="axis" type="select" optional="true" label="The axis along which to impute" help=" "> <option value="0" selected="true">Impute along columns</option> <option value="1">Impute along rows</option> @@ -802,136 +811,285 @@ </expand> </xml> - <xml name="fs_selectfrommodel_prefitted"> - <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" > - <option value="new" selected="true">Yes</option> - <option value="prefitted">No. Load a prefitted estimator</option> - </param> - <when value="new"> - <expand macro="estimator_selector_all"/> - </when> - <when value="prefitted"> - <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" /> - </when> + <xml name="cv_splitter"> + <option value="default" selected="true">default splitter</option> + <option value="KFold">KFold</option> + <option value="StratifiedKFold">StratifiedKFold</option> + <option value="LeaveOneOut">LeaveOneOut</option> + <option value="LeavePOut">LeavePOut</option> + <option value="RepeatedKFold">RepeatedKFold</option> + <option value="RepeatedStratifiedKFold">RepeatedStratifiedKFold</option> + <option value="ShuffleSplit">ShuffleSplit</option> + <option value="StratifiedShuffleSplit">StratifiedShuffleSplit</option> + <option value="TimeSeriesSplit">TimeSeriesSplit</option> + <option value="PredefinedSplit">PredefinedSplit</option> + <yield/> </xml> - <xml name="fs_selectfrommodel_no_prefitted"> - <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" > - <option value="new" selected="true">Yes</option> - </param> - <when value="new"> - <expand macro="estimator_selector_all"/> + <xml name="cv_splitter_options"> + <when value="default"> + <expand macro="cv_n_splits"/> + </when> + <when value="KFold"> + <expand macro="cv_n_splits"/> + <expand macro="cv_shuffle"/> + <expand macro="random_state"/> + </when> + <when value="StratifiedKFold"> + <expand macro="cv_n_splits"/> + <expand macro="cv_shuffle"/> + <expand macro="random_state"/> + </when> + <when value="LeaveOneOut"> + </when> + <when value="LeavePOut"> + <param argument="p" type="integer" value="" label="p" help="Integer. Size of the test sets."/> + </when> + <when value="RepeatedKFold"> + <expand macro="cv_n_splits" value="5"/> + <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." /> + <expand macro="random_state" /> </when> + <when value="RepeatedStratifiedKFold"> + <expand macro="cv_n_splits" value="5"/> + <param argument="n_repeats" type="integer" value="10" label="n_repeats" help="Number of times cross-validator needs to be repeated." /> + <expand macro="random_state" /> + </when> + <when value="ShuffleSplit"> + <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations."/> + <expand macro="cv_test_size" value="0.1" /> + <expand macro="random_state"/> + </when> + <when value="StratifiedShuffleSplit"> + <expand macro="cv_n_splits" value="10" help="Number of re-shuffling and splitting iterations."/> + <expand macro="cv_test_size" value="0.1" /> + <expand macro="random_state"/> + </when> + <when value="TimeSeriesSplit"> + <expand macro="cv_n_splits"/> + <param argument="max_train_size" type="integer" value="" optional="true" label="Maximum size of the training set" help="Maximum size for a single training set." /> + </when> + <when value="PredefinedSplit"> + <param argument="test_fold" type="text" value="" area="true" label="test_fold" help="List, e.g., [0, 1, -1, 1], represents two test sets, [X[0]] and [X[1], X[3]], X[2] is excluded from any test set due to '-1'."/> + </when> + <yield/> </xml> <xml name="cv"> - <param argument="cv" type="text" value="" optional="true" label="cv" help="Optional. Integer or evalable splitter object, e.g., StratifiedKFold(n_splits=3, shuffle=True, random_state=10). Leave blank for default." > - <sanitizer> - <valid initial="default"> - <add value="'"/> - </valid> - </sanitizer> - </param> + <conditional name="cv_selector"> + <param name="selected_cv" type="select" label="Select the cv splitter:"> + <expand macro="cv_splitter"> + <option value="GroupKFold">GroupKFold</option> + <option value="GroupShuffleSplit">GroupShuffleSplit</option> + <option value="LeaveOneGroupOut">LeaveOneGroupOut</option> + <option value="LeavePGroupsOut">LeavePGroupsOut</option> + </expand> + </param> + <expand macro="cv_splitter_options"> + <when value="GroupKFold"> + <expand macro="cv_n_splits"/> + <expand macro="cv_groups" /> + </when> + <when value="GroupShuffleSplit"> + <expand macro="cv_n_splits" value="5"/> + <expand macro="cv_test_size"/> + <expand macro="random_state"/> + <expand macro="cv_groups"/> + </when> + <when value="LeaveOneGroupOut"> + <expand macro="cv_groups"/> + </when> + <when value="LeavePGroupsOut"> + <param argument="n_groups" type="integer" value="" label="n_groups" help="Number of groups (p) to leave out in the test split." /> + <expand macro="cv_groups"/> + </when> + </expand> + </conditional> + </xml> + + <xml name="cv_reduced"> + <conditional name="cv_selector"> + <param name="selected_cv" type="select" label="Select the cv splitter:"> + <expand macro="cv_splitter"/> + </param> + <expand macro="cv_splitter_options"/> + </conditional> + </xml> + + <xml name="cv_n_splits" token_value="3" token_help="Number of folds. Must be at least 2."> + <param argument="n_splits" type="integer" value="@VALUE@" min="2" label="n_splits" help="@HELP@"/> + </xml> + + <xml name="cv_shuffle"> + <param argument="shuffle" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to shuffle data before splitting" /> + </xml> + + <xml name="cv_test_size" token_value="0.2"> + <param argument="test_size" type="float" value="@VALUE@" min="0.0" label="Portion or number of the test set" help="0.0-1.0, proportion of the dataset to include in the test split; >1, integer only, the absolute number of test samples "/> + </xml> + + <xml name="cv_groups" > + <param argument="groups" type="text" value="" area="true" label="Groups" help="Group lables in a list. e.g., [1, 1, 2, 2, 3, 3, 3]"/> + </xml> + + <xml name="feature_selection_algorithms"> + <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option> + <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> + <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option> + <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option> + <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option> + <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option> + <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option> + <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> + <option value="RFE">RFE - Feature ranking with recursive feature elimination</option> + <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option> </xml> - <xml name="feature_selection_all"> + <xml name="feature_selection_algorithm_details"> + <when value="GenericUnivariateSelect"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="mode" type="select" label="Feature selection mode"> + <option value="percentile">percentile</option> + <option value="k_best">k_best</option> + <option value="fpr">fpr</option> + <option value="fdr">fdr</option> + <option value="fwe">fwe</option> + </param> + <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" /> + </section> + </when> + <when value="SelectPercentile"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" /> + </section> + </when> + <when value="SelectKBest"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." /> + </section> + </when> + <when value="SelectFpr"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/> + </section> + </when> + <when value="SelectFdr"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> + </section> + </when> + <when value="SelectFwe"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> + </section> + </when> + <when value="VarianceThreshold"> + <section name="options" title="Options" expanded="False"> + <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/> + </section> + </when> + </xml> + + <xml name="feature_selection_SelectFromModel"> + <when value="SelectFromModel"> + <conditional name="model_inputter"> + <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" > + <option value="new" selected="true">Yes</option> + <option value="prefitted">No. Load a prefitted estimator</option> + </param> + <when value="new"> + <expand macro="estimator_selector_fs"/> + </when> + <when value="prefitted"> + <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" /> + </when> + </conditional> + <expand macro="feature_selection_SelectFromModel_options"/> + </when> + </xml> + + <xml name="feature_selection_SelectFromModel_no_prefitted"> + <when value="SelectFromModel"> + <conditional name="model_inputter"> + <param name="input_mode" type="select" label="Construct a new estimator from a selection list?" > + <option value="new" selected="true">Yes</option> + </param> + <when value="new"> + <expand macro="estimator_selector_all"/> + </when> + </conditional> + <expand macro="feature_selection_SelectFromModel_options"/> + </when> + </xml> + + <xml name="feature_selection_SelectFromModel_options"> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." /> + <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " /> + <param argument="max_features" type="integer" value="" optional="true" label="The maximum number of features selected scoring above threshold" help="To disable threshold and only select based on max_features, set threshold=-np.inf."/> + </section> + </xml> + + <xml name="feature_selection_RFE"> + <when value="RFE"> + <yield/> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." /> + <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> + <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> + </section> + </when> + </xml> + + <xml name="feature_selection_RFECV"> + <when value="RFECV"> + <yield/> + <section name="options" title="Advanced Options" expanded="False"> + <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> + <param argument="min_features_to_select" type="integer" value="1" optional="true" label="The minimum number of features to be selected"/> + <expand macro="cv_reduced"/> + <expand macro="scoring_selection"/> + <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> + </section> + </when> + </xml> + + <xml name="feature_selection_pipeline"> + <!--compare to `feature_selection_fs`, no fitted estimator for SelectFromModel and no customer estimator for RFE and RFECV--> <conditional name="fs_algorithm_selector"> <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> - <option value="SelectKBest" selected="true">SelectKBest - Select features according to the k highest scores</option> - <option value="SelectFromModel">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> - <option value="GenericUnivariateSelect">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> - <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option> - <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option> - <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option> - <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option> - <option value="RFE">RFE - Feature ranking with recursive feature elimination</option> - <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option> - <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option> + <expand macro="feature_selection_algorithms"/> </param> - <when value="SelectFromModel"> - <conditional name="model_inputter"> - <yield/> - </conditional> - <section name="options" title="Advanced Options" expanded="False"> - <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." /> - <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " /> - </section> - </when> - <when value="GenericUnivariateSelect"> - <expand macro="feature_selection_score_function" /> - <section name="options" title="Advanced Options" expanded="False"> - <param argument="mode" type="select" label="Feature selection mode"> - <option value="percentile">percentile</option> - <option value="k_best">k_best</option> - <option value="fpr">fpr</option> - <option value="fdr">fdr</option> - <option value="fwe">fwe</option> - </param> - <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" /> - </section> - </when> - <when value="SelectPercentile"> - <expand macro="feature_selection_score_function" /> - <section name="options" title="Advanced Options" expanded="False"> - <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" /> - </section> - </when> - <when value="SelectKBest"> - <expand macro="feature_selection_score_function" /> - <section name="options" title="Advanced Options" expanded="False"> - <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." /> - </section> - </when> - <when value="SelectFpr"> - <expand macro="feature_selection_score_function" /> - <section name="options" title="Advanced Options" expanded="False"> - <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/> - </section> - </when> - <when value="SelectFdr"> - <expand macro="feature_selection_score_function" /> - <section name="options" title="Advanced Options" expanded="False"> - <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> - </section> - </when> - <when value="SelectFwe"> - <expand macro="feature_selection_score_function" /> - <section name="options" title="Advanced Options" expanded="False"> - <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> - </section> - </when> - <when value="RFE"> + <expand macro="feature_selection_algorithm_details"/> + <expand macro="feature_selection_SelectFromModel_no_prefitted"/> + <expand macro="feature_selection_RFE"> + <expand macro="estimator_selector_all"/> + </expand> + <expand macro="feature_selection_RFECV"> <expand macro="estimator_selector_all"/> - <section name="options" title="Advanced Options" expanded="False"> - <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." /> - <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> - <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> - </section> - </when> - <when value="RFECV"> - <expand macro="estimator_selector_all"/> - <section name="options" title="Advanced Options" expanded="False"> - <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> - <expand macro="cv"/> - <expand macro="scoring_selection"/> - <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> - </section> - </when> - <when value="VarianceThreshold"> - <section name="options" title="Options" expanded="False"> - <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/> - </section> - </when> - <!--when value="chi2"> - </when> - <when value="f_classif"> - </when> - <when value="f_regression"> - </when> - <when value="mutual_info_classif"> - </when> - <when value="mutual_info_regression"> - </when--> + </expand> + </conditional> + </xml> + + <xml name="feature_selection_fs"> + <conditional name="fs_algorithm_selector"> + <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> + <expand macro="feature_selection_algorithms"/> + </param> + <expand macro="feature_selection_algorithm_details"/> + <expand macro="feature_selection_SelectFromModel"/> + <expand macro="feature_selection_RFE"> + <expand macro="estimator_selector_fs"/> + </expand> + <expand macro="feature_selection_RFECV"> + <expand macro="estimator_selector_fs"/> + </expand> </conditional> </xml> @@ -945,21 +1103,6 @@ </param> </xml> - <xml name="feature_selection_output_mothods"> - <conditional name="output_method_selector"> - <param name="selected_method" type="select" label="Select an output method:"> - <option value="fit_transform">fit_transform - Fit to data, then transform it</option> - <option value="get_support">get_support - Get a mask, or integer index, of the features selected</option> - </param> - <when value="fit_transform"> - <!--**fit_params--> - </when> - <when value="get_support"> - <param name="indices" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Indices" help="If True, the return value will be an array of integers, rather than a boolean mask."/> - </when> - </conditional> - </xml> - <xml name="model_validation_common_options"> <expand macro="cv"/> <expand macro="verbose"/> @@ -1122,22 +1265,22 @@ <expand macro="pre_dispatch" value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/> <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/> <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset."/> - <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to 0 if an error occurs in estimator fitting and FitFailedWarning is raised."/> + <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised."/> <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help=""/> </xml> - <xml name="estimator_selector_all"> - <conditional name="estimator_selector"> - <param name="selected_module" type="select" label="Choose the module that contains target estimator:" > - <option value="svm" selected="true">sklearn.svm</option> - <option value="linear_model">sklearn.linear_model</option> - <option value="ensemble">sklearn.ensemble</option> - <option value="naive_bayes">sklearn.naive_bayes</option> - <option value="tree">sklearn.tree</option> - <option value="neighbors">sklearn.neighbors</option> - <option value="xgboost">xgboost</option> - <!--more--> - </param> + <xml name="estimator_module_options"> + <option value="svm" selected="true">sklearn.svm</option> + <option value="linear_model">sklearn.linear_model</option> + <option value="ensemble">sklearn.ensemble</option> + <option value="naive_bayes">sklearn.naive_bayes</option> + <option value="tree">sklearn.tree</option> + <option value="neighbors">sklearn.neighbors</option> + <option value="xgboost">xgboost</option> + <yield/> + </xml> + + <xml name="estimator_suboptions"> <when value="svm"> <param name="selected_estimator" type="select" label="Choose estimator class:"> <option value="LinearSVC" selected="true">LinearSVC</option> @@ -1244,6 +1387,30 @@ </param> <expand macro="estimator_params_text"/> </when> + <yield/> + </xml> + + <xml name="estimator_selector_all"> + <conditional name="estimator_selector"> + <param name="selected_module" type="select" label="Choose the module that contains target estimator:" > + <expand macro="estimator_module_options"/> + </param> + <expand macro="estimator_suboptions"/> + </conditional> + </xml> + + <xml name="estimator_selector_fs"> + <conditional name="estimator_selector"> + <param name="selected_module" type="select" label="Choose the module that contains target estimator:" > + <expand macro="estimator_module_options"> + <option value="customer_estimator">Load a customer estimator</option> + </expand> + </param> + <expand macro="estimator_suboptions"> + <when value="customer_estimator"> + <param name="c_estimator" type="data" format="zip" label="Choose the dataset containing the customer estimator or pipeline:"/> + </when> + </expand> </conditional> </xml> @@ -1373,7 +1540,7 @@ <option value="SURFstar">SURFstar</option> <option value="MultiSURF">MultiSURF</option> <option value="MultiSURFstar">MultiSURFstar</option> - <option value="TuRF">TuRF</option> + <!--option value="TuRF">TuRF</option> --> </param> <when value="ReliefF"> <expand macro="estimator_params_text" @@ -1395,12 +1562,115 @@ <expand macro="estimator_params_text" help="Default(=blank): discrete_threshold=10, n_features_to_select=10, verbose=False."/> </when> - <when value="TuRF"> + <!--when value="TuRF"> <expand macro="estimator_params_text" help="Default(=blank): core_algorithm='ReliefF', discrete_threshold=10, n_features_to_select=10, n_neighbors=100, pct=0.5, verbose=False."/> + </when> --> + </conditional> + </xml> + + <xml name="imbalanced_learn_sampling"> + <conditional name="imblearn_selector"> + <param name="select_algorithm" type="select" label="Choose the algorithm:"> + <option value="under_sampling.ClusterCentroids" selected="true">under_sampling.ClusterCentroids</option> + <option value="under_sampling.CondensedNearestNeighbour">under_sampling.CondensedNearestNeighbour</option> + <option value="under_sampling.EditedNearestNeighbours">under_sampling.EditedNearestNeighbours</option> + <option value="under_sampling.RepeatedEditedNearestNeighbours">under_sampling.RepeatedEditedNearestNeighbours</option> + <option value="under_sampling.AllKNN">under_sampling.AllKNN</option> + <option value="under_sampling.InstanceHardnessThreshold">under_sampling.InstanceHardnessThreshold</option> + <option value="under_sampling.NearMiss">under_sampling.NearMiss</option> + <option value="under_sampling.NeighbourhoodCleaningRule">under_sampling.NeighbourhoodCleaningRule</option> + <option value="under_sampling.OneSidedSelection">under_sampling.OneSidedSelection</option> + <option value="under_sampling.RandomUnderSampler">under_sampling.RandomUnderSampler</option> + <option value="under_sampling.TomekLinks">under_sampling.TomekLinks</option> + <option value="over_sampling.ADASYN">over_sampling.ADASYN</option> + <option value="over_sampling.RandomOverSampler">over_sampling.RandomOverSampler</option> + <option value="over_sampling.SMOTE">over_sampling.SMOTE</option> + <option value="over_sampling.SVMSMOTE">over_sampling.SVMSMOTE</option> + <option value="over_sampling.BorderlineSMOTE">over_sampling.BorderlineSMOTE</option> + <option value="over_sampling.SMOTENC">over_sampling.SMOTENC</option> + <option value="combine.SMOTEENN">combine.SMOTEENN</option> + <option value="combine.SMOTETomek">combine.SMOTETomek</option> + </param> + <when value="under_sampling.ClusterCentroids"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, estimator=None, voting='auto'."/> + </when> + <when value="under_sampling.CondensedNearestNeighbour"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=None, n_seeds_S=1."/> + </when> + <when value="under_sampling.EditedNearestNeighbours"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, max_iter=100, kind_sel='all'."/> + </when> + <when value="under_sampling.RepeatedEditedNearestNeighbours"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, max_iter=100, kind_sel='all'."/> + </when> + <when value="under_sampling.AllKNN"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, kind_sel='all', allow_minority=False."/> + </when> + <when value="under_sampling.InstanceHardnessThreshold"> + <expand macro="estimator_params_text" + help="Default(=blank): estimator=None, sampling_strategy='auto', random_state=None, cv=5."/> + </when> + <when value="under_sampling.NearMiss"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, version=1, n_neighbors=3, n_neighbors_ver3=3."/> + </when> + <when value="under_sampling.NeighbourhoodCleaningRule"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=3, kind_sel='all', threshold_cleaning=0.5."/> + </when> + <when value="under_sampling.OneSidedSelection"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=None, n_seeds_S=1."/> + </when> + <when value="under_sampling.RandomUnderSampler"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, replacement=False."/> + </when> + <when value="under_sampling.TomekLinks"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None."/> + </when> + <when value="over_sampling.ADASYN"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, n_neighbors=5."/> + </when> + <when value="over_sampling.RandomOverSampler"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None."/> + </when> + <when value="over_sampling.SMOTE"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, k_neighbors=5."/> + </when> + <when value="over_sampling.SVMSMOTE"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', k_neighbors=5, m_neighbors=10, out_step=0.5, random_state=None, svm_estimator=None."/> + </when> + <when value="over_sampling.BorderlineSMOTE"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', k_neighbors=5, kind='borderline-1', m_neighbors=10, random_state=None."/> + </when> + <when value="over_sampling.SMOTENC"> + <expand macro="estimator_params_text" + help="Default: categorical_features=[], sampling_strategy='auto', random_state=None, k_neighbors=5."/> + </when> + <when value="combine.SMOTEENN"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, enn=None."/> + </when> + <when value="combine.SMOTETomek"> + <expand macro="estimator_params_text" + help="Default(=blank): sampling_strategy='auto', random_state=None, smote=None, tomek=None."/> </when> </conditional> </xml> + <!-- Outputs --> <xml name="output"> @@ -1498,4 +1768,19 @@ </citation> </xml> + <xml name="imblearn_citation"> + <citation type="bibtex"> + @article{JMLR:v18:16-365, + author = {Guillaume Lema{{\^i}}tre and Fernando Nogueira and Christos K. Aridas}, + title = {Imbalanced-learn: A Python Toolbox to Tackle the Curse of Imbalanced Datasets in Machine Learning}, + journal = {Journal of Machine Learning Research}, + year = {2017}, + volume = {18}, + number = {17}, + pages = {1-5}, + url = {http://jmlr.org/papers/v18/16-365.html} + } + </citation> + </xml> + </macros>