Mercurial > repos > bgruening > sklearn_data_preprocess

diff main_macros.xml @ 31:eb79bde99328 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5b2ac730ec6d3b762faa9034eddd19ad1b347476"
author: bgruening
date: Mon, 16 Dec 2019 05:15:58 -0500
parents: 66df2aa6cd6b
children: 1b5cd2d16fb1
--- a/main_macros.xml	Thu Nov 07 05:24:09 2019 -0500
+++ b/main_macros.xml	Mon Dec 16 05:15:58 2019 -0500
@@ -1,12 +1,10 @@
 <macros>
-  <token name="@VERSION@">1.0.7.12</token>
-
-  <token name="@ENSEMBLE_VERSION@">0.2.0</token>
+  <token name="@VERSION@">1.0.8.1</token>
 
   <xml name="python_requirements">
       <requirements>
           <requirement type="package" version="3.6">python</requirement>
-          <requirement type="package" version="0.7.12">Galaxy-ML</requirement>
+          <requirement type="package" version="0.8.1">Galaxy-ML</requirement>
           <yield/>
       </requirements>
   </xml>
@@ -235,8 +233,8 @@
     <param argument="fit_intercept" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="@CHECKED@" label="Estimate the intercept" help="If false, the data is assumed to be already centered."/>
   </xml>
 
-  <xml name="n_iter" token_default_value="5" token_help_text="The number of passes over the training data (aka epochs). ">
-    <param argument="n_iter" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/>
+  <xml name="n_iter_no_change" token_default_value="5" token_help_text="Number of iterations with no improvement to wait before early stopping. ">
+    <param argument="n_iter_no_change" type="integer" optional="true" value="@DEFAULT_VALUE@" label="Number of iterations" help="@HELP_TEXT@"/>
   </xml>
 
   <xml name="shuffle" token_checked="true" token_help_text=" " token_label="Shuffle data after each iteration">
@@ -763,6 +761,9 @@
       <option value="MinMaxScaler">Minmax Scaler (Scales features to a range)</option>
       <option value="PolynomialFeatures">Polynomial Features (Generates polynomial and interaction features)</option>
       <option value="RobustScaler">Robust Scaler (Scales features using outlier-invariance statistics)</option>
+      <option value="QuantileTransformer">QuantileTransformer (Transform features using quantiles information)</option>
+      <option value="PowerTransformer">PowerTransformer (Apply a power transform featurewise to make data more Gaussian-like)</option>
+      <option value="KBinsDiscretizer">KBinsDiscretizer (Bin continuous data into intervals.)</option>
     </expand>
   </xml>
 
@@ -837,6 +838,42 @@
                   label="Use a copy of data for inplace scaling" help=" "/>
           </section>
       </when>
+      <when value="QuantileTransformer">
+          <section name="options" title="Advanced Options" expanded="False">
+              <param name="n_quantiles" type="integer" value="1000" min="0" label="Number of quantiles to be computed" />
+              <param name="output_distribution" type="select" label="Marginal distribution for the transformed data">
+                  <option value="uniform" selected="true">uniform</option>
+                  <option value="normal">normal</option>
+              </param>
+              <param name="ignore_implicit_zeros" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Whether to discard sparse entries" help="Only applies to sparse matrices. If False, sparse entries are treated as zeros"/>
+              <param name="subsample" type="integer" value="100000" label="Maximum number of samples used to estimate the quantiles for computational efficiency" help="Note that the subsampling procedure may differ for value-identical sparse and dense matrices."/>
+              <expand macro="random_state" help_text="This is used by subsampling and smoothing noise"/>
+          </section>
+      </when>
+      <when value="PowerTransformer">
+          <section name="options" title="Advanced Options" expanded="False">
+              <param name="method" type="select" label="The power transform method">
+                  <option value="yeo-johnson" selected="true">yeo-johnson (works with positive and negative values)</option>
+                  <option value="box-cox">box-cox (might perform better, but only works with strictly positive values)</option>
+              </param>
+              <param name="standardize" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Whether to apply zero-mean, unit-variance normalization to the transformed output."/>
+          </section>
+      </when>
+      <when value="KBinsDiscretizer">
+          <section name="options" title="Advanced Options" expanded="False">
+              <param name="n_bins" type="integer" value="5" min="2" label="The number of bins to produce"/>
+              <param name="encode" type="select" label="Method used to encode the transformed result">
+                  <option value="onehot" selected="true">onehot (encode the transformed result with one-hot encoding and return a sparse matrix)</option>
+                  <option value="onehot-dense">onehot-dense (encode the transformed result with one-hot encoding and return a dense array)</option>
+                  <option value="ordinal">ordinal (return the bin identifier encoded as an integer value)</option>
+              </param>
+              <param name="strategy" type="select" label="Strategy used to define the widths of the bins">
+                  <option value="uniform">uniform (all bins in each feature have identical widths)</option>
+                  <option value="quantile" selected="true">quantile (all bins in each feature have the same number of points)</option>
+                  <option value="kmeans">kmeans (values in each bin have the same nearest center of a 1D k-means cluster)</option>
+              </param>
+          </section>
+      </when>
     </expand>
   </xml>
 
@@ -1261,6 +1298,7 @@
         <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option>
         <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option>
         <option value="r2">Regression -- 'r2'</option>
+        <option value="max_error">Regression -- 'max_error'</option>
         <option value="binarize_auc_scorer">anomaly detection -- binarize_auc_scorer</option>
         <option value="binarize_average_precision_scorer">anomaly detection -- binarize_average_precision_scorer</option>
       </param>
@@ -1291,6 +1329,7 @@
       <when value="neg_mean_squared_log_error"><expand macro="secondary_scoring_selection_regression"/></when>
       <when value="neg_median_absolute_error"><expand macro="secondary_scoring_selection_regression"/></when>
       <when value="r2"><expand macro="secondary_scoring_selection_regression"/></when>
+      <when value="max_error"><expand macro="secondary_scoring_selection_regression"/></when>
       <when value="binarize_auc_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when>
       <when value="binarize_average_precision_scorer"><expand macro="secondary_scoring_selection_anormaly"/></when>
     </conditional>
@@ -1329,6 +1368,7 @@
       <option value="neg_mean_squared_log_error">Regression -- 'neg_mean_squared_log_error'</option>
       <option value="neg_median_absolute_error">Regression -- 'neg_median_absolute_error'</option>
       <option value="r2">Regression -- 'r2'</option>
+      <option value="max_error">Regression -- 'max_error'</option>
     </param>
   </xml>
 
@@ -1343,32 +1383,6 @@
     <param argument="pre_dispatch" type="@TYPE@" value="@DEFAULT_VALUE@" optional="true" label="pre_dispatch" help="@HELP@"/>
   </xml>
 
-  <xml name="search_cv_estimator">
-    <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/>
-    <section name="search_params_builder" title="Search parameters Builder" expanded="true">
-      <param name="infile_params" type="data" format="tabular" optional="true" label="Choose the dataset containing parameter names" help="This dataset could be the output of `get_params` in the `Estimator Attributes` tool."/>
-      <repeat name="param_set" min="1" max="30" title="Parameter settings for search:">
-          <param name="sp_name" type="select" optional="true" label="Choose a parameter name (with current value)">
-            <options from_dataset="infile_params" startswith="@">
-              <column name="name" index="2"/>
-              <column name="value" index="1"/>
-              <filter type="unique_value" name="unique_param" column="1"/>
-            </options>
-          </param>
-          <param name="sp_list" type="text" value="" optional="true" label="Search list" help="list or array-like, for example: [1, 10, 100, 1000], [True, False] and ['auto', 'sqrt', None]. See `help` section for more examples">
-            <sanitizer>
-              <valid initial="default">
-                <add value="&apos;"/>
-                <add value="&quot;"/>
-                <add value="["/>
-                <add value="]"/>
-              </valid>
-            </sanitizer>
-          </param>
-      </repeat>
-    </section>
-  </xml>
-
   <xml name="estimator_and_hyperparameter">
     <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/>
     <section name="hyperparams_swapping" title="Hyperparameter Swapping" expanded="false">
@@ -1398,7 +1412,7 @@
       <expand macro="model_validation_common_options"/>
       <!--expand macro="pre_dispatch" default_value="2*n_jobs" help="Controls the number of jobs that get dispatched during parallel execution"/-->
       <param argument="iid" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="iid" help="If True, data is identically distributed across the folds"/>
-      <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset. Be aware that `refit=True` invokes extra computation, but it's REQUIRED for outputting the best estimator!"/>
+      <!--param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="refit" help="Refit an estimator using the best found parameters on the whole dataset. Be aware that `refit=True` invokes extra computation, but it's REQUIRED for outputting the best estimator!"/> -->
       <param argument="error_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Raise fit error:" help="If false, the metric score is assigned to NaN if an error occurs in estimator fitting and FitFailedWarning is raised."/>
       <param argument="return_train_score" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="return_train_score" help=""/>
   </xml>
@@ -1475,6 +1489,8 @@
           <option value="GradientBoostingClassifier">GradientBoostingClassifier</option>
           <option value="GradientBoostingRegressor">GradientBoostingRegressor</option>
           <option value="IsolationForest">IsolationForest</option>
+          <option value="HistGradientBoostingClassifier">HistGradientBoostingClassifier</option>
+          <option value="HistGradientBoostingRegressor">HistGradientBoostingRegressor</option>
           <option value="RandomForestClassifier">RandomForestClassifier</option>
           <option value="RandomForestRegressor">RandomForestRegressor</option>
           <option value="RandomTreesEmbedding">RandomTreesEmbedding</option>
author	bgruening
date	Mon, 16 Dec 2019 05:15:58 -0500
parents	66df2aa6cd6b
children	1b5cd2d16fb1