diff generalized_linear.xml @ 35:602edec75e1d draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
author bgruening
date Tue, 13 Apr 2021 17:25:00 +0000
parents a8c7b9fa426c
children fe181d613429
line wrap: on
line diff
--- a/generalized_linear.xml	Thu Oct 01 19:58:28 2020 +0000
+++ b/generalized_linear.xml	Tue Apr 13 17:25:00 2021 +0000
@@ -1,19 +1,18 @@
-<tool id="sklearn_generalized_linear" name="Generalized linear models" version="@VERSION@">
+<tool id="sklearn_generalized_linear" name="Generalized linear models" version="@VERSION@" profile="20.05">
     <description>for classification and regression</description>
     <macros>
         <import>main_macros.xml</import>
     </macros>
-    <expand macro="python_requirements"/>
-    <expand macro="macro_stdio"/>
+    <expand macro="python_requirements" />
+    <expand macro="macro_stdio" />
     <version_command>echo "@VERSION@"</version_command>
     <command><![CDATA[
     python "$glm_script" '$inputs'
 ]]>
     </command>
     <configfiles>
-        <inputs name="inputs"/>
-        <configfile name="glm_script">
-<![CDATA[
+        <inputs name="inputs" />
+        <configfile name="glm_script"><![CDATA[
 import sys
 import json
 import numpy as np
@@ -69,7 +68,7 @@
                 <option value="Perceptron">Perceptron</option>
             </param>
             <when value="SGDClassifier">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
                     <expand macro="loss">
                         <option value="hinge" selected="true">hinge</option>
@@ -78,258 +77,276 @@
                         <option value="squared_hinge">squared hinge</option>
                         <option value="perceptron">perceptron</option>
                     </expand>
-                    <expand macro="penalty"/>
-                    <expand macro="alpha"/>
-                    <expand macro="l1_ratio"/>
-                    <expand macro="fit_intercept"/>
+                    <expand macro="penalty" />
+                    <expand macro="alpha" />
+                    <expand macro="l1_ratio" />
+                    <expand macro="fit_intercept" />
                     <expand macro="n_iter_no_change" />
-                    <expand macro="shuffle"/>
-                    <expand macro="epsilon"/>
-                    <expand macro="learning_rate_s" selected1="true"/>
-                    <expand macro="eta0"/>
-                    <expand macro="power_t"/>
+                    <expand macro="shuffle" />
+                    <expand macro="epsilon" />
+                    <expand macro="learning_rate_s" selected1="true" />
+                    <expand macro="eta0" />
+                    <expand macro="power_t" />
                     <!--class_weight-->
-                    <expand macro="warm_start" checked="false"/>
-                    <expand macro="random_state"/>
+                    <expand macro="warm_start" checked="false" />
+                    <expand macro="random_state" />
                     <!--average-->
                 </section>
             </when>
             <when value="SGDRegressor">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
-                    <expand macro="loss" select="true"/>
-                    <expand macro="penalty"/>
-                    <expand macro="alpha"/>
-                    <expand macro="l1_ratio"/>
-                    <expand macro="fit_intercept"/>
+                    <expand macro="loss" select="true" />
+                    <expand macro="penalty" />
+                    <expand macro="alpha" />
+                    <expand macro="l1_ratio" />
+                    <expand macro="fit_intercept" />
                     <expand macro="n_iter_no_change" />
-                    <expand macro="shuffle"/>
-                    <expand macro="epsilon"/>
-                    <expand macro="learning_rate_s" selected2="true"/>
-                    <expand macro="eta0" default_value="0.01"/>
-                    <expand macro="power_t" default_value="0.25"/>
-                    <expand macro="warm_start" checked="false"/>
-                    <expand macro="random_state"/>
+                    <expand macro="shuffle" />
+                    <expand macro="epsilon" />
+                    <expand macro="learning_rate_s" selected2="true" />
+                    <expand macro="eta0" default_value="0.01" />
+                    <expand macro="power_t" default_value="0.25" />
+                    <expand macro="warm_start" checked="false" />
+                    <expand macro="random_state" />
                     <!--average-->
                 </section>
             </when>
             <when value="LinearRegression">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
-                    <expand macro="fit_intercept"/>
-                    <expand macro="normalize"/>
-                    <expand macro="copy_X"/>
+                    <expand macro="fit_intercept" />
+                    <expand macro="normalize" />
+                    <expand macro="copy_X" />
                 </section>
             </when>
             <when value="RidgeClassifier">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
-                    <expand macro="ridge_params"/>
+                    <expand macro="ridge_params" />
                 </section>
             </when>
             <when value="Ridge">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
-                    <expand macro="ridge_params"/>
+                    <expand macro="ridge_params" />
                 </section>
             </when>
             <when value="LogisticRegression">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
-                    <expand macro="penalty"/>
-                    <param argument="dual" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Use dual formulation" help=" "/>
-                    <expand macro="tol" default_value="0.0001" help_text="Tolerance for stopping criteria. "/>
-                    <expand macro="C"/>
-                    <expand macro="fit_intercept"/>
-                    <expand macro="max_iter" default_value="100"/>
-                    <expand macro="warm_start" checked="false"/>
+                    <expand macro="penalty" />
+                    <param argument="dual" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Use dual formulation" help=" " />
+                    <expand macro="tol" default_value="0.0001" help_text="Tolerance for stopping criteria. " />
+                    <expand macro="C" />
+                    <expand macro="fit_intercept" />
+                    <expand macro="max_iter" default_value="100" />
+                    <expand macro="warm_start" checked="false" />
                     <param argument="solver" type="select" label="Optimization algorithm" help=" ">
                         <option value="liblinear" selected="true">liblinear</option>
                         <option value="sag">sag</option>
                         <option value="lbfgs">lbfgs</option>
                         <option value="newton-cg">newton-cg</option>
                     </param>
-                    <param argument="intercept_scaling" type="float" value="1" label="Intercept scaling factor" help="Useful only if solver is liblinear. "/>
+                    <param argument="intercept_scaling" type="float" value="1" label="Intercept scaling factor" help="Useful only if solver is liblinear. " />
                     <param argument="multi_class" type="select" label="Multiclass option" help="Works only for lbfgs solver. ">
                         <option value="ovr" selected="true">ovr</option>
                         <option value="multinomial">multinomial</option>
                     </param>
                     <!--class_weight-->
-                    <expand macro="random_state"/>
+                    <expand macro="random_state" />
                 </section>
             </when>
             <when value="LogisticRegressionCV">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
-                    <param argument="Cs" type="integer" value="10" label="Inverse of regularization strength" help="A grid of Cs values are chosen in a logarithmic scale between 1e-4 and 1e4. Like in support vector machines, smaller values specify stronger regularization. "/>
-                    <param argument="dual" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Use dual formulation" help=" "/>
-                    <param argument="cv" type="integer" optional="true" value="" label="Number of folds used in cross validation" help="If not set, the default cross-validation generator (Stratified K-Folds) is used. "/>
-                    <expand macro="penalty"/>
-                    <expand macro="tol" default_value="0.0001" help_text="Tolerance for stopping criteria. "/>
-                    <expand macro="fit_intercept"/>
-                    <expand macro="max_iter" default_value="100"/>
+                    <param argument="Cs" type="integer" value="10" label="Inverse of regularization strength" help="A grid of Cs values are chosen in a logarithmic scale between 1e-4 and 1e4. Like in support vector machines, smaller values specify stronger regularization. " />
+                    <param argument="dual" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Use dual formulation" help=" " />
+                    <param argument="cv" type="integer" optional="true" value="" label="Number of folds used in cross validation" help="If not set, the default cross-validation generator (Stratified K-Folds) is used. " />
+                    <expand macro="penalty" />
+                    <expand macro="tol" default_value="0.0001" help_text="Tolerance for stopping criteria. " />
+                    <expand macro="fit_intercept" />
+                    <expand macro="max_iter" default_value="100" />
                     <param argument="solver" type="select" label="Optimization algorithm" help=" ">
                         <option value="liblinear" selected="true">liblinear</option>
                         <option value="sag">sag</option>
                         <option value="lbfgs">lbfgs</option>
                         <option value="newton-cg">newton-cg</option>
                     </param>
-                    <param argument="intercept_scaling" type="float" value="1" label="Intercept scaling factor" help="Useful only if solver is liblinear. "/>
+                    <param argument="intercept_scaling" type="float" value="1" label="Intercept scaling factor" help="Useful only if solver is liblinear. " />
                     <param argument="multi_class" type="select" label="Multiclass option" help="Works only for lbfgs solver. ">
                         <option value="ovr" selected="true">ovr</option>
                         <option value="multinomial">multinomial</option>
                     </param>
-                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Average scores across all folds" help=" "/>
-                    <expand macro="random_state"/>
-                    <!--scoring=None>
-                    <class_weight=None-->
+                    <param argument="refit" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="true" label="Average scores across all folds" help=" " />
+                    <expand macro="random_state" />
+                    <!--scoring=None> <class_weight=None-->
                 </section>
             </when>
             <when value="Perceptron">
-                <expand macro="sl_mixed_input"/>
+                <expand macro="sl_mixed_input" />
                 <section name="options" title="Advanced Options" expanded="False">
-                    <expand macro="penalty" default_value="none"/>
-                    <expand macro="alpha"/>
-                    <expand macro="fit_intercept"/>
+                    <expand macro="penalty" default_value="none" />
+                    <expand macro="alpha" />
+                    <expand macro="fit_intercept" />
                     <expand macro="n_iter_no_change" />
-                    <expand macro="shuffle"/>
-                    <expand macro="eta0" default_value="1"/>
-                    <expand macro="warm_start" checked="false"/>
-                    <expand macro="random_state" default_value="0"/>
+                    <expand macro="shuffle" />
+                    <expand macro="eta0" default_value="1" />
+                    <expand macro="warm_start" checked="false" />
+                    <expand macro="random_state" default_value="0" />
                     <!--class_weight=None-->
                 </section>
             </when>
         </expand>
     </inputs>
-    <expand macro="output"/>
+    <expand macro="output" />
     <tests>
         <test>
-            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
-            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
-            <param name="selected_column_selector_option" value="all_but_by_index_number"/>
-            <param name="col1" value="6"/>
-            <param name="col2" value="6"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="SGDRegressor"/>
-            <param name="random_state" value="10"/>
-            <output name="outfile_fit" file="glm_model01" compare="sim_size" delta="5"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular" />
+            <param name="infile2" value="regression_train.tabular" ftype="tabular" />
+            <param name="selected_column_selector_option" value="all_but_by_index_number" />
+            <param name="col1" value="6" />
+            <param name="col2" value="6" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="SGDRegressor" />
+            <param name="random_state" value="10" />
+            <output name="outfile_fit" file="glm_model01" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile_model" value="glm_model01" ftype="zip"/>
-            <param name="infile_data" value="regression_test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="glm_result01" lines_diff="4"/>
+            <param name="infile_model" value="glm_model01" ftype="zip" />
+            <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="glm_result01" lines_diff="4" />
         </test>
         <test>
-            <param name="infile1" value="train.tabular" ftype="tabular"/>
-            <param name="infile2" value="train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4"/>
-            <param name="col2" value="5"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="SGDClassifier"/>
-            <param name="random_state" value="10"/>
-            <output name="outfile_fit" file="glm_model02" compare="sim_size" delta="5"/>
+            <param name="infile1" value="train.tabular" ftype="tabular" />
+            <param name="infile2" value="train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4" />
+            <param name="col2" value="5" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="SGDClassifier" />
+            <param name="random_state" value="10" />
+            <output name="outfile_fit" file="glm_model02" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile_model" value="glm_model02" ftype="zip"/>
-            <param name="infile_data" value="test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="glm_result02"/>
+            <param name="infile_model" value="glm_model02" ftype="zip" />
+            <param name="infile_data" value="test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="glm_result02" />
+        </test>
+        <test>
+            <param name="infile1" value="train.tabular" ftype="tabular" />
+            <param name="infile2" value="train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4" />
+            <param name="col2" value="5" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="RidgeClassifier" />
+            <param name="random_state" value="10" />
+            <output name="outfile_fit" file="glm_model03" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile1" value="train.tabular" ftype="tabular"/>
-            <param name="infile2" value="train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4"/>
-            <param name="col2" value="5"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="RidgeClassifier"/>
-            <param name="random_state" value="10"/>
-            <output name="outfile_fit" file="glm_model03" compare="sim_size" delta="5"/>
+            <param name="infile_model" value="glm_model03" ftype="zip" />
+            <param name="infile_data" value="test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="glm_result03" />
         </test>
         <test>
-            <param name="infile_model" value="glm_model03" ftype="zip"/>
-            <param name="infile_data" value="test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="glm_result03"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular" />
+            <param name="infile2" value="regression_train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4,5" />
+            <param name="col2" value="6" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="LinearRegression" />
+            <output name="outfile_fit" file="glm_model04" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
-            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4,5"/>
-            <param name="col2" value="6"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="LinearRegression"/>
-            <output name="outfile_fit" file="glm_model04" compare="sim_size" delta="5"/>
+            <param name="infile_model" value="glm_model04" ftype="zip" />
+            <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="glm_result04" lines_diff="8" />
         </test>
         <test>
-            <param name="infile_model" value="glm_model04" ftype="zip"/>
-            <param name="infile_data" value="regression_test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="glm_result04" lines_diff="8"/>
+            <param name="infile1" value="train.tabular" ftype="tabular" />
+            <param name="infile2" value="train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4" />
+            <param name="col2" value="5" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="LogisticRegression" />
+            <param name="random_state" value="10" />
+            <output name="outfile_fit" file="glm_model05" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile1" value="train.tabular" ftype="tabular"/>
-            <param name="infile2" value="train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4"/>
-            <param name="col2" value="5"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="LogisticRegression"/>
-            <param name="random_state" value="10"/>
-            <output name="outfile_fit" file="glm_model05" compare="sim_size" delta="5"/>
+            <param name="infile_model" value="glm_model05" ftype="zip" />
+            <param name="infile_data" value="test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="glm_result05" />
         </test>
         <test>
-            <param name="infile_model" value="glm_model05" ftype="zip"/>
-            <param name="infile_data" value="test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="glm_result05"/>
+            <param name="infile1" value="train.tabular" ftype="tabular" />
+            <param name="infile2" value="train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4" />
+            <param name="col2" value="5" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="LogisticRegressionCV" />
+            <param name="random_state" value="10" />
+            <output name="outfile_fit" file="glm_model06" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile1" value="train.tabular" ftype="tabular"/>
-            <param name="infile2" value="train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4"/>
-            <param name="col2" value="5"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="LogisticRegressionCV"/>
-            <param name="random_state" value="10"/>
-            <output name="outfile_fit" file="glm_model06" compare="sim_size" delta="5"/>
+            <param name="infile_model" value="glm_model06" ftype="zip" />
+            <param name="infile_data" value="test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="glm_result06" />
         </test>
         <test>
-            <param name="infile_model" value="glm_model06" ftype="zip"/>
-            <param name="infile_data" value="test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="glm_result06"/>
-        </test>
-        <test>
-            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
-            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4,5"/>
-            <param name="col2" value="6"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="Ridge"/>
-            <param name="random_state" value="10"/>
-            <output name="outfile_fit" file="glm_model07" compare="sim_size" delta="5"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular" />
+            <param name="infile2" value="regression_train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4,5" />
+            <param name="col2" value="6" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="Ridge" />
+            <param name="random_state" value="10" />
+            <output name="outfile_fit" file="glm_model07" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile_model" value="glm_model07" ftype="zip"/>
-            <param name="infile_data" value="regression_test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="glm_result07"/>
+            <param name="infile_model" value="glm_model07" ftype="zip" />
+            <param name="infile_data" value="regression_test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict">
+                <assert_contents>
+                    <has_n_columns n="6" />
+                    <has_text text="86.9702122735000" />
+                    <has_text text="-1.0173960197" />
+                    <has_text text="0.64184687433" />
+                    <has_text text="-0.621522971207000" />
+                    <has_text text="0.39001218449" />
+                    <has_text text="0.596382816494397" />
+                    <has_text text="-47.4101632272" />
+                    <has_text text="-0.732777468453000" />
+                    <has_text text="-1.0610977011" />
+                    <has_text text="-1.099948005770000" />
+                    <has_text text="0.58565796301" />
+                    <has_text text="0.262144044202223" />
+                    <has_text text="-206.99829512" />
+                    <has_text text="0.7057412304" />
+                    <has_text text="-1.332209237379999" />
+                </assert_contents>
+            </output>
         </test>
         <test>
-            <param name="infile1" value="train.tabular" ftype="tabular"/>
-            <param name="infile2" value="train.tabular" ftype="tabular"/>
-            <param name="col1" value="1,2,3,4"/>
-            <param name="col2" value="5"/>
-            <param name="selected_task" value="train"/>
-            <param name="selected_algorithm" value="Perceptron"/>
-            <param name="random_state" value="10"/>
-            <output name="outfile_fit" file="glm_model08" compare="sim_size" delta="5"/>
+            <param name="infile1" value="train.tabular" ftype="tabular" />
+            <param name="infile2" value="train.tabular" ftype="tabular" />
+            <param name="col1" value="1,2,3,4" />
+            <param name="col2" value="5" />
+            <param name="selected_task" value="train" />
+            <param name="selected_algorithm" value="Perceptron" />
+            <param name="random_state" value="10" />
+            <output name="outfile_fit" file="glm_model08" compare="sim_size" delta="5" />
         </test>
         <test>
-            <param name="infile_model" value="glm_model08" ftype="zip"/>
-            <param name="infile_data" value="test.tabular" ftype="tabular"/>
-            <param name="selected_task" value="load"/>
-            <output name="outfile_predict" file="glm_result08"/>
+            <param name="infile_model" value="glm_model08" ftype="zip" />
+            <param name="infile_data" value="test.tabular" ftype="tabular" />
+            <param name="selected_task" value="load" />
+            <output name="outfile_predict" file="glm_result08" />
         </test>
     </tests>
     <help><![CDATA[
@@ -396,6 +413,6 @@
  **3 - Prediction output**
  The tool predicts the class labels for new samples and adds them as the last column to the prediction dataset. The new dataset then is output as a tabular file. The prediction output format should look like the training dataset.
 
-    ]]></help>
-    <expand macro="sklearn_citation"/>
+    ]]>    </help>
+    <expand macro="sklearn_citation" />
 </tool>