Mercurial > repos > bgruening > sklearn_ensemble

diff ensemble.xml @ 35:19d6c2745d34 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
author: bgruening
date: Tue, 13 Apr 2021 17:40:39 +0000
parents: af0523c606a7
children: 6546d7c9f08b
--- a/ensemble.xml	Thu Oct 01 20:15:12 2020 +0000
+++ b/ensemble.xml	Tue Apr 13 17:40:39 2021 +0000
@@ -1,19 +1,19 @@
-<tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@">
+<tool id="sklearn_ensemble" name="Ensemble methods" version="@VERSION@" profile="20.05">
     <description>for classification and regression</description>
     <macros>
         <import>main_macros.xml</import>
     </macros>
-    <expand macro="python_requirements"/>
-    <expand macro="macro_stdio"/>
+    <expand macro="python_requirements" />
+    <expand macro="macro_stdio" />
     <version_command>echo "@VERSION@"</version_command>
     <command><![CDATA[
     python "$ensemble_script" '$inputs'
 ]]>
     </command>
     <configfiles>
-        <inputs name="inputs"/>
+        <inputs name="inputs" />
         <configfile name="ensemble_script">
-<![CDATA[
+            <![CDATA[
 import json
 import numpy as np
 import pandas
@@ -99,98 +99,98 @@
                 <option value="GradientBoostingRegressor">Gradient Boosting Regressor</option>
             </param>
             <when value="RandomForestClassifier">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
-                    <expand macro="n_estimators" default_value="100"/>
-                    <expand macro="criterion"/>
-                    <expand macro="max_features"/>
-                    <expand macro="max_depth"/>
-                    <expand macro="min_samples_split"/>
-                    <expand macro="min_samples_leaf"/>
-                    <expand macro="min_weight_fraction_leaf"/>
-                    <expand macro="max_leaf_nodes"/>
-                    <expand macro="bootstrap"/>
-                    <expand macro="warm_start" checked="false"/>
-                    <expand macro="random_state"/>
-                    <expand macro="oob_score"/>
+                    <expand macro="n_estimators" default_value="100" />
+                    <expand macro="criterion" />
+                    <expand macro="max_features" />
+                    <expand macro="max_depth" />
+                    <expand macro="min_samples_split" />
+                    <expand macro="min_samples_leaf" />
+                    <expand macro="min_weight_fraction_leaf" />
+                    <expand macro="max_leaf_nodes" />
+                    <expand macro="bootstrap" />
+                    <expand macro="warm_start" checked="false" />
+                    <expand macro="random_state" />
+                    <expand macro="oob_score" />
                     <!--class_weight=None-->
                 </section>
             </when>
             <when value="AdaBoostClassifier">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
                     <!--base_estimator=None-->
-                    <expand macro="n_estimators" default_value="50"/>
-                    <expand macro="learning_rate"/>
-                    <param argument="algorithm" type="select" label="Boosting algorithm"  help=" ">
+                    <expand macro="n_estimators" default_value="50" />
+                    <expand macro="learning_rate" />
+                    <param argument="algorithm" type="select" label="Boosting algorithm" help=" ">
                         <option value="SAMME.R" selected="true">SAMME.R</option>
                         <option value="SAMME">SAMME</option>
                     </param>
-                    <expand macro="random_state"/>
+                    <expand macro="random_state" />
                 </section>
             </when>
             <when value="GradientBoostingClassifier">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
                     <!--base_estimator=None-->
                     <param argument="loss" type="select" label="Loss function">
                         <option value="deviance" selected="true">deviance - logistic regression with probabilistic outputs</option>
                         <option value="exponential">exponential - gradient boosting recovers the AdaBoost algorithm</option>
                     </param>
-                    <expand macro="learning_rate" default_value='0.1'/>
-                    <expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform"/>
-                    <expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators"/>
+                    <expand macro="learning_rate" default_value='0.1' />
+                    <expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform" />
+                    <expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators" />
                     <expand macro="criterion2">
                         <option value="friedman_mse" selected="true">friedman_mse - mean squared error with improvement score by Friedman</option>
                     </expand>
-                    <expand macro="min_samples_split" type="float"/>
-                    <expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node"/>
-                    <expand macro="min_weight_fraction_leaf"/>
-                    <expand macro="subsample"/>
-                    <expand macro="max_features"/>
-                    <expand macro="max_leaf_nodes"/>
-                    <expand macro="min_impurity_decrease"/>
-                    <expand macro="verbose"/>
-                    <expand macro="warm_start" checked="false"/>
-                    <expand macro="random_state"/>
-                    <expand macro="presort"/>
+                    <expand macro="min_samples_split" type="float" />
+                    <expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node" />
+                    <expand macro="min_weight_fraction_leaf" />
+                    <expand macro="subsample" />
+                    <expand macro="max_features" />
+                    <expand macro="max_leaf_nodes" />
+                    <expand macro="min_impurity_decrease" />
+                    <expand macro="verbose" />
+                    <expand macro="warm_start" checked="false" />
+                    <expand macro="random_state" />
+                    <expand macro="presort" />
                 </section>
             </when>
             <when value="RandomForestRegressor">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
-                    <expand macro="n_estimators" default_value="100"/>
-                    <expand macro="criterion2"/>
-                    <expand macro="max_features"/>
-                    <expand macro="max_depth"/>
-                    <expand macro="min_samples_split"/>
-                    <expand macro="min_samples_leaf"/>
-                    <expand macro="min_weight_fraction_leaf"/>
-                    <expand macro="max_leaf_nodes"/>
-                    <expand macro="min_impurity_decrease"/>
-                    <expand macro="bootstrap"/>
-                    <expand macro="oob_score"/>
-                    <expand macro="random_state"/>
-                    <expand macro="verbose"/>
-                    <expand macro="warm_start" checked="false"/>
+                    <expand macro="n_estimators" default_value="100" />
+                    <expand macro="criterion2" />
+                    <expand macro="max_features" />
+                    <expand macro="max_depth" />
+                    <expand macro="min_samples_split" />
+                    <expand macro="min_samples_leaf" />
+                    <expand macro="min_weight_fraction_leaf" />
+                    <expand macro="max_leaf_nodes" />
+                    <expand macro="min_impurity_decrease" />
+                    <expand macro="bootstrap" />
+                    <expand macro="oob_score" />
+                    <expand macro="random_state" />
+                    <expand macro="verbose" />
+                    <expand macro="warm_start" checked="false" />
                 </section>
             </when>
             <when value="AdaBoostRegressor">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
                     <!--base_estimator=None-->
-                    <expand macro="n_estimators" default_value="50"/>
-                    <expand macro="learning_rate"/>
-                    <param argument="loss" type="select" label="Loss function"  optional="true" help="Used when updating the weights after each boosting iteration. ">
+                    <expand macro="n_estimators" default_value="50" />
+                    <expand macro="learning_rate" />
+                    <param argument="loss" type="select" label="Loss function" optional="true" help="Used when updating the weights after each boosting iteration. ">
                         <option value="linear" selected="true">linear</option>
                         <option value="square">square</option>
                         <option value="exponential">exponential</option>
                     </param>
-                    <expand macro="random_state"/>
+                    <expand macro="random_state" />
                 </section>
             </when>
             <when value="GradientBoostingRegressor">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
                     <param argument="loss" type="select" label="Loss function">
                         <option value="ls" selected="true">ls - least squares regression</option>
@@ -198,132 +198,132 @@
                         <option value="huber">huber - combination of least squares regression and least absolute deviation</option>
                         <option value="quantile">quantile - use alpha to specify the quantile</option>
                     </param>
-                    <expand macro="learning_rate" default_value="0.1"/>
-                    <expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform"/>
-                    <expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators"/>
+                    <expand macro="learning_rate" default_value="0.1" />
+                    <expand macro="n_estimators" default_value="100" help="The number of boosting stages to perform" />
+                    <expand macro="max_depth" default_value="3" help="maximum depth of the individual regression estimators" />
                     <expand macro="criterion2">
                         <option value="friedman_mse" selected="true">friedman_mse - mean squared error with improvement score by Friedman</option>
                     </expand>
-                    <expand macro="min_samples_split" type="float"/>
-                    <expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node"/>
-                    <expand macro="min_weight_fraction_leaf"/>
-                    <expand macro="subsample"/>
-                    <expand macro="max_features"/>
-                    <expand macro="max_leaf_nodes"/>
-                    <expand macro="min_impurity_decrease"/>
+                    <expand macro="min_samples_split" type="float" />
+                    <expand macro="min_samples_leaf" type="float" label="The minimum number of samples required to be at a leaf node" />
+                    <expand macro="min_weight_fraction_leaf" />
+                    <expand macro="subsample" />
+                    <expand macro="max_features" />
+                    <expand macro="max_leaf_nodes" />
+                    <expand macro="min_impurity_decrease" />
                     <param argument="alpha" type="float" value="0.9" label="alpha" help="The alpha-quantile of the huber loss function and the quantile loss function" />
                     <!--base_estimator=None-->
-                    <expand macro="verbose"/>
-                    <expand macro="warm_start" checked="false"/>
-                    <expand macro="random_state"/>
-                    <expand macro="presort"/>
+                    <expand macro="verbose" />
+                    <expand macro="warm_start" checked="false" />
+                    <expand macro="random_state" />
+                    <expand macro="presort" />
                 </section>
             </when>
         </expand>
     </inputs>
 
-    <expand macro="output"/>
+    <expand macro="output" />
 
     <tests>
         <test>
-            <param name="infile1" value="train.tabular" ftype="tabular"/>
-            <param name="infile2" value="train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4"/>
-            <param name="col2" value="5"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="RandomForestClassifier"/>
-            <param name="random_state" value="10"/>
-            <output name="outfile_fit" file="rfc_model01" compare="sim_size" delta="5"/>
+            <param name="infile1" value="train.tabular" ftype="tabular" />
+            <param name="infile2" value="train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4" />
+            <param name="col2" value="5" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="RandomForestClassifier" />
+            <param name="random_state" value="10" />
+            <output name="outfile_fit" file="rfc_model01" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile_model" value="rfc_model01" ftype="zip"/>
-            <param name="infile_data" value="test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="rfc_result01"/>
+            <param name="infile_model" value="rfc_model01" ftype="zip" />
+            <param name="infile_data" value="test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="rfc_result01" />
         </test>
         <test>
-            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
-            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4,5"/>
-            <param name="col2" value="6"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="RandomForestRegressor"/>
-            <param name="random_state" value="10"/>
-            <output name="outfile_fit" file="rfr_model01" compare="sim_size" delta="5"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular" />
+            <param name="infile2" value="regression_train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4,5" />
+            <param name="col2" value="6" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="RandomForestRegressor" />
+            <param name="random_state" value="10" />
+            <output name="outfile_fit" file="rfr_model01" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile_model" value="rfr_model01" ftype="zip"/>
-            <param name="infile_data" value="regression_test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="rfr_result01"/>
+            <param name="infile_model" value="rfr_model01" ftype="zip" />
+            <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="rfr_result01" />
         </test>
         <test>
-            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
-            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
-            <param name="header1" value="True"/>
-            <param name="selected_column_selector_option" value="all_columns"/>
-            <param name="header2" value="True"/>
-            <param name="col2" value="1"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="GradientBoostingRegressor"/>
-            <param name="max_features" value="number_input"/>
-            <param name="num_max_features" value="0.5"/>
-            <param name="random_state" value="42"/>
-            <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="5"/>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular" />
+            <param name="infile2" value="regression_y.tabular" ftype="tabular" />
+            <param name="header1" value="True" />
+            <param name="selected_column_selector_option" value="all_columns" />
+            <param name="header2" value="True" />
+            <param name="col2" value="1" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="GradientBoostingRegressor" />
+            <param name="max_features" value="number_input" />
+            <param name="num_max_features" value="0.5" />
+            <param name="random_state" value="42" />
+            <output name="outfile_fit" file="gbr_model01" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile_model" value="gbr_model01" ftype="zip"/>
-            <param name="infile_data" value="regression_test_X.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <param name="header" value="True"/>
-            <output name="outfile_predict" file="gbr_prediction_result01.tabular"/>
+            <param name="infile_model" value="gbr_model01" ftype="zip" />
+            <param name="infile_data" value="regression_test_X.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <param name="header" value="True" />
+            <output name="outfile_predict" file="gbr_prediction_result01.tabular" />
         </test>
         <test>
-            <param name="infile1" value="train.tabular" ftype="tabular"/>
-            <param name="infile2" value="train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4"/>
-            <param name="col2" value="5"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="GradientBoostingClassifier"/>
-            <output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="5"/>
+            <param name="infile1" value="train.tabular" ftype="tabular" />
+            <param name="infile2" value="train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4" />
+            <param name="col2" value="5" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="GradientBoostingClassifier" />
+            <output name="outfile_fit" file="gbc_model01" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile_model" value="gbc_model01" ftype="zip"/>
-            <param name="infile_data" value="test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="gbc_result01"/>
+            <param name="infile_model" value="gbc_model01" ftype="zip" />
+            <param name="infile_data" value="test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="gbc_result01" />
         </test>
         <test>
-            <param name="infile1" value="train.tabular" ftype="tabular"/>
-            <param name="infile2" value="train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4"/>
-            <param name="col2" value="5"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="AdaBoostClassifier"/>
-            <param name="random_state" value="10"/>
-            <output name="outfile_fit" file="abc_model01" compare="sim_size" delta="5"/>
+            <param name="infile1" value="train.tabular" ftype="tabular" />
+            <param name="infile2" value="train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4" />
+            <param name="col2" value="5" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="AdaBoostClassifier" />
+            <param name="random_state" value="10" />
+            <output name="outfile_fit" file="abc_model01" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile_model" value="abc_model01" ftype="zip"/>
-            <param name="infile_data" value="test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="abc_result01"/>
+            <param name="infile_model" value="abc_model01" ftype="zip" />
+            <param name="infile_data" value="test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="abc_result01" />
         </test>
         <test>
-            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
-            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4,5"/>
-            <param name="col2" value="6"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="AdaBoostRegressor"/>
-            <param name="random_state" value="10"/>
-            <output name="outfile_fit" file="abr_model01" compare="sim_size" delta="5"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular" />
+            <param name="infile2" value="regression_train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4,5" />
+            <param name="col2" value="6" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="AdaBoostRegressor" />
+            <param name="random_state" value="10" />
+            <output name="outfile_fit" file="abr_model01" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile_model" value="abr_model01" ftype="zip"/>
-            <param name="infile_data" value="regression_test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="abr_result01"/>
+            <param name="infile_model" value="abr_model01" ftype="zip" />
+            <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="abr_result01" />
         </test>
     </tests>
     <help><![CDATA[
@@ -390,6 +390,6 @@
  **3 - Prediction output**
  The tool predicts the class labels for new samples and adds them as the last column to the prediction dataset. The new dataset then is output as a tabular file. The prediction output format should look like the training dataset.
 
-    ]]></help>
-    <expand macro="sklearn_citation"/>
+    ]]>    </help>
+    <expand macro="sklearn_citation" />
 </tool>
author	bgruening
date	Tue, 13 Apr 2021 17:40:39 +0000
parents	af0523c606a7
children	6546d7c9f08b