Mercurial > repos > bgruening > sklearn_data_preprocess
diff main_macros.xml @ 31:eb79bde99328 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5b2ac730ec6d3b762faa9034eddd19ad1b347476"
author | bgruening |
---|---|
date | Mon, 16 Dec 2019 05:15:58 -0500 |
parents | 66df2aa6cd6b |
children | 1b5cd2d16fb1 |
line wrap: on
line diff
--- a/main_macros.xml Thu Nov 07 05:24:09 2019 -0500 +++ b/main_macros.xml Mon Dec 16 05:15:58 2019 -0500 @@ -1,12 +1,10 @@ <macros> - <token name="@VERSION@">1.0.7.12</token> - - <token name="@ENSEMBLE_VERSION@">0.2.0</token> + <token name="@VERSION@">1.0.8.1</token> <xml name="python_requirements"> <requirements> <requirement type="package" version="3.6">python</requirement> - <requirement type="package" version="0.7.12">Galaxy-ML</requirement> + <requirement type="package" version="0.8.1">Galaxy-ML</requirement> <yield/> </requirements> </xml> @@ -235,8 +233,8 @@ <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/> </xml> - <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). "> - <param argument="n_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/> + <xml name="n_iter_no_change" token_default_value="5" token_help_text="Number of iterations with no improvement to wait before early stopping. "> + <param argument="n_iter_no_change" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/> </xml> <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration"> @@ -763,6 +761,9 @@ <option value="MinMaxScaler">Minmax Scaler (Scales features to a range)</option> <option value="PolynomialFeatures">Polynomial Features (Generates polynomial and interaction features)</option> <option value="RobustScaler">Robust Scaler (Scales features using outlier-invariance statistics)</option> + <option value="QuantileTransformer">QuantileTransformer (Transform features using quantiles information)</option> + <option value="PowerTransformer">PowerTransformer (Apply a power transform featurewise to make data more Gaussian-like)</option> + <option value="KBinsDiscretizer">KBinsDiscretizer (Bin continuous data into intervals.)</option> </expand> </xml> @@ -837,6 +838,42 @@ label="Use a copy of data for inplace scaling" help=" "/> </section> </when> + <when value="QuantileTransformer"> + <section name="options" title="Advanced Options" expanded="False"> + <param name="n_quantiles" type="integer" value="1000" min="0" label="Number of quantiles to be computed" /> + <param name="output_distribution" type="select" label="Marginal distribution for the transformed data"> + <option value="uniform" selected="true">uniform</option> + <option value="normal">normal</option> + </param> + <param name="ignore_implicit_zeros" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to discard sparse entries" help="Only applies to sparse matrices. If False, sparse entries are treated as zeros"/> + <param name="subsample" type="integer" value="100000" label="Maximum number of samples used to estimate the quantiles for computational efficiency" help="Note that the subsampling procedure may differ for value-identical sparse and dense matrices."/> + <expand macro="random_state" help_text="This is used by subsampling and smoothing noise"/> + </section> + </when> + <when value="PowerTransformer"> + <section name="options" title="Advanced Options" expanded="False"> + <param name="method" type="select" label="The power transform method"> + <option value="yeo-johnson" selected="true">yeo-johnson (works with positive and negative values)</option> + <option value="box-cox">box-cox (might perform better, but only works with strictly positive values)</option> + </param> + <param name="standardize" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Whether to apply zero-mean, unit-variance normalization to the transformed output."/> + </section> + </when> + <when value="KBinsDiscretizer"> + <section name="options" title="Advanced Options" expanded="False"> + <param name="n_bins" type="integer" value="5" min="2" label="The number of bins to produce"/> + <param name="encode" type="select" label="Method used to encode the transformed result"> + <option value="onehot" selected="true">onehot (encode the transformed result with one-hot encoding and return a sparse matrix)</option> + <option value="onehot-dense">onehot-dense (encode the transformed result with one-hot encoding and return a dense array)</option> + <option value="ordinal">ordinal (return the bin identifier encoded as an integer value)</option> + </param> + <param name="strategy" type="select" label="Strategy used to define the widths of the bins"> + <option value="uniform">uniform (all bins in each feature have identical widths)</option> + <option value="quantile" selected="true">quantile (all bins in each feature have the same number of points)</option> + <option value="kmeans">kmeans (values in each bin have the same nearest center of a 1D k-means cluster)</option> + </param> + </section> + </when> </expand> </xml> @@ -1261,6 +1298,7 @@ <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option> <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option> <option value="r2">Regression -- 'r2'</option> + <option value="max_error">Regression -- 'max_error'</option> <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option> <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option> </param> @@ -1291,6 +1329,7 @@ <when value="neg_mean_squared_log_error"><expand macro="secondary_scoring_selection_regression"/></when> <when value="neg_median_absolute_error"><expand macro="secondary_scoring_selection_regression"/></when> <when value="r2"><expand macro="secondary_scoring_selection_regression"/></when> + <when value="max_error"><expand macro="secondary_scoring_selection_regression"/></when> <when value="binarize_auc_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when> <when value="binarize_average_precision_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when> </conditional> @@ -1329,6 +1368,7 @@ <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option> <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option> <option value="r2">Regression -- 'r2'</option> + <option value="max_error">Regression -- 'max_error'</option> </param> </xml> @@ -1343,32 +1383,6 @@ <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/> </xml> - <xml name="search_cv_estimator"> - <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> - <section name="search_params_builder" title="Search parameters Builder" expanded="true"> - <param name="infile_params" type="data" format="tabular" optional="true" label="Choose the dataset containing parameter names" help="This dataset could be the output of `get_params` in the `Estimator Attributes` tool."/> - <repeat name="param_set" min="1" max="30" title="Parameter settings for search:"> - <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)"> - <options from_dataset="infile_params" startswith="@"> - <column name="name" index="2"/> - <column name="value" index="1"/> - <filter type="unique_value" name="unique_param" column="1"/> - </options> - </param> - <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples"> - <sanitizer> - <valid initial="default"> - <add value="'"/> - <add value="""/> - <add value="["/> - <add value="]"/> - </valid> - </sanitizer> - </param> - </repeat> - </section> - </xml> - <xml name="estimator_and_hyperparameter"> <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> <section name="hyperparams_swapping" title="Hyperparameter Swapping" expanded="false"> @@ -1398,7 +1412,7 @@ <expand macro="model_validation_common_options"/> <!--expand macro="pre_dispatch" default_value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/--> <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/> - <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset. Be aware that `refit=True` invokes extra computation, but it's REQUIRED for outputting the best estimator!"/> + <!--param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset. Be aware that `refit=True` invokes extra computation, but it's REQUIRED for outputting the best estimator!"/> --> <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised."/> <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help=""/> </xml> @@ -1475,6 +1489,8 @@ <option value="GradientBoostingClassifier">GradientBoostingClassifier</option> <option value="GradientBoostingRegressor">GradientBoostingRegressor</option> <option value="IsolationForest">IsolationForest</option> + <option value="HistGradientBoostingClassifier">HistGradientBoostingClassifier</option> + <option value="HistGradientBoostingRegressor">HistGradientBoostingRegressor</option> <option value="RandomForestClassifier">RandomForestClassifier</option> <option value="RandomForestRegressor">RandomForestRegressor</option> <option value="RandomTreesEmbedding">RandomTreesEmbedding</option>