diff model_validation.xml @ 0:333507faecab draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2e1e78576b38110cf5b1f2ed83b08b9c3a6cbfee
author bgruening
date Sat, 28 Apr 2018 18:10:26 -0400
parents
children dd502cb0d567
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/model_validation.xml	Sat Apr 28 18:10:26 2018 -0400
@@ -0,0 +1,302 @@
+<tool id="sklearn_model_validation" name="Model Validation" version="@VERSION@">
+    <description>evaluates estimator performance by cross-validation</description>
+    <macros>
+        <import>main_macros.xml</import>
+    </macros>
+    <expand macro="python_requirements"/>
+    <expand macro="macro_stdio"/>
+    <version_command>echo "@VERSION@"</version_command>
+    <command>
+        <![CDATA[
+        python "$sklearn_model_validation_script" '$inputs'
+        ]]>
+    </command>
+    <configfiles>
+        <inputs name="inputs" />
+        <configfile name="sklearn_model_validation_script">
+            <![CDATA[
+import sys
+import json
+import pandas
+import pickle
+import numpy as np
+import sklearn.model_selection
+from sklearn import svm, linear_model, ensemble
+
+@COLUMNS_FUNCTION@
+
+input_json_path = sys.argv[1]
+params = json.load(open(input_json_path, "r"))
+
+input_type = params["input_options"]["selected_input"]
+if input_type=="tabular":
+    header = 'infer' if params["input_options"]["header1"] else None
+    X = read_columns(
+            "$input_options.infile1",
+            "$input_options.col1",
+            sep='\t',
+            header=header,
+            parse_dates=True
+    )
+else:
+    X = mmread(open("$input_options.infile1", 'r'))
+
+header = 'infer' if params["input_options"]["header2"] else None
+y = read_columns(
+        "$input_options.infile2",
+        "$input_options.col2",
+        sep='\t',
+        header=header,
+        parse_dates=True
+)
+y=y.ravel()
+
+validator = params["model_validation_functions"]["selected_function"]
+validator = getattr(sklearn.model_selection, validator)
+options = params["model_validation_functions"]["options"]
+if 'scoring' in options and options['scoring'] == '':
+    options['scoring'] = None
+
+estimator=params["model_validation_functions"]["estimator"]
+if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no':
+    estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"]
+estimator = eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
+
+#if $model_validation_functions.selected_function == 'cross_validate':
+res = validator(estimator, X, y, **options)
+rval = res["$model_validation_functions.return_type"]
+
+#elif $model_validation_functions.selected_function == 'learning_curve':
+options['train_sizes'] = eval(options['train_sizes'])
+train_sizes_abs, train_scores, test_scores = validator(estimator, X, y, **options)
+rval = eval("$model_validation_functions.return_type")
+
+#elif $model_validation_functions.selected_function == 'permutation_test_score':
+score, permutation_scores, pvalue = validator(estimator, X, y, **options)
+rval = eval("$model_validation_functions.return_type")
+if "$model_validation_functions.return_type" in ["score", "pvalue"]:
+    rval = [rval]
+
+#elif $model_validation_functions.selected_function == 'validation_curve':
+options['param_range'] = eval(options['param_range'])
+train_scores, test_scores = validator(estimator, X, y, **options)
+rval = eval("$model_validation_functions.return_type")
+
+#else:
+rval = validator(estimator, X, y, **options)
+#end if
+
+rval = pandas.DataFrame(rval)
+rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False)
+
+            ]]>
+        </configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="model_validation_functions">
+            <param name="selected_function" type="select" label="Select a model validation function">
+                <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option>
+                <option value="cross_val_predict">cross_val_predict - Generate cross-validated estimates for each input data point</option>
+                <option value="cross_val_score">cross_val_score - Evaluate a score by cross-validation</option>
+                <option value="learning_curve">learning_curve - Learning curve</option>
+                <option value="permutation_test_score">permutation_test_score - Evaluate the significance of a cross-validated score with permutations</option>
+                <option value="validation_curve">validation_curve - Validation curve</option>
+            </param>
+            <when value="cross_validate">
+                <expand macro="feature_selection_estimator" />
+                <conditional name="extra_estimator">
+                    <expand macro="feature_selection_extra_estimator" />
+                    <expand macro="feature_selection_estimator_choices" />
+                </conditional>
+                <section name="options" title="Other Options" expanded="false">
+                    <!--groups-->
+                    <expand macro="model_validation_common_options"/>
+                    <expand macro="scoring"/>
+                    <!--fit_params-->
+                    <expand macro="pre_dispatch"/>
+                </section>
+                <param name="return_type" type="select" label="Select a return type">
+                    <option value="test_score" selected="true">test_score</option>
+                    <option value="train_score">train_score</option>
+                    <option value="fit_time">fit_time</option>
+                    <option value="score_time">score_time</option>
+                </param>
+            </when>
+            <when value="cross_val_predict">
+                <expand macro="feature_selection_estimator" />
+                <conditional name="extra_estimator">
+                    <expand macro="feature_selection_extra_estimator" />
+                    <expand macro="feature_selection_estimator_choices" />
+                </conditional>
+                <section name="options" title="Other Options" expanded="false">
+                    <!--groups-->
+                    <param argument="cv" type="integer" value="" optional="true" label="cv" help="The number of folds in a (Stratified)KFold" />
+                    <expand macro="n_jobs"/>
+                    <expand macro="verbose"/>
+                    <!--fit_params-->
+                    <param argument="pre_dispatch" type="integer" value="" optional="true" label="pre_dispatch" help="Controls the number of jobs that get dispatched during parallel execution" />
+                    <param argument="method" type="select" label="Invokes the passed method name of the passed estimator">
+                        <option value="predict" selected="true">predict</option>
+                        <option value="predict_proba">predict_proba</option>
+                    </param>
+                </section>
+            </when>
+            <when value="cross_val_score">
+                <expand macro="feature_selection_estimator" />
+                <conditional name="extra_estimator">
+                    <expand macro="feature_selection_extra_estimator" />
+                    <expand macro="feature_selection_estimator_choices" />
+                </conditional>
+                <section name="options" title="Other Options" expanded="false">
+                    <!--groups-->
+                    <expand macro="model_validation_common_options"/>
+                    <expand macro="scoring"/>
+                    <!--fit_params-->
+                    <expand macro="pre_dispatch"/>
+                </section>
+            </when>
+            <when value="learning_curve">
+                <expand macro="feature_selection_estimator" />
+                <conditional name="extra_estimator">
+                    <expand macro="feature_selection_extra_estimator" />
+                    <expand macro="feature_selection_estimator_choices" />
+                </conditional>
+                <section name="options" title="Other Options" expanded="false">
+                    <!--groups-->
+                    <expand macro="model_validation_common_options"/>
+                    <param argument="train_sizes" type="text" value="np.linspace(0.1, 1.0, 5)" label="train_sizes" help="Relative or absolute numbers of training examples that will be used to generate the learning curve"/>
+                    <expand macro="scoring"/>
+                    <param argument="exploit_incremental_learning" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="exploit_incremental_learning" help="Whether to apply incremental learning to speed up fitting of the estimator if supported"/>
+                    <expand macro="pre_dispatch"/>
+                    <expand macro="shuffle" checked="false" label="shuffle" help="Whether to shuffle training data before taking prefixes"/>
+                    <expand macro="random_state"/>
+                </section>
+                <param name="return_type" type="select" label="Select a return type">
+                    <option value="train_sizes_abs" selected="true">train_sizes_abs</option>
+                    <option value="train_scores">train_scores</option>
+                    <option value="test_scores">test_scores</option>
+                </param>
+            </when>
+            <when value="permutation_test_score">
+                <expand macro="feature_selection_estimator" />
+                <conditional name="extra_estimator">
+                    <expand macro="feature_selection_extra_estimator" />
+                    <expand macro="feature_selection_estimator_choices" />
+                </conditional>
+                <section name="options" title="Other Options" expanded="false">
+                    <!--groups-->
+                    <expand macro="model_validation_common_options"/>
+                    <expand macro="scoring"/>
+                    <param name="n_permutations" type="integer" value="100" optional="true" label="n_permutations" help="Number of times to permute y"/>
+                    <expand macro="random_state"/>
+                </section>
+                <param name="return_type" type="select" label="Select a return type">
+                    <option value="score" selected="true">score</option>
+                    <option value="permutation_scores">permutation_scores</option>
+                    <option value="pvalue">pvalue</option>
+                </param>
+            </when>
+            <when value="validation_curve">
+                <expand macro="feature_selection_estimator" />
+                <conditional name="extra_estimator">
+                    <expand macro="feature_selection_extra_estimator" />
+                    <expand macro="feature_selection_estimator_choices" />
+                </conditional>
+                <section name="options" title="Other Options" expanded="false">
+                    <param name="param_name" type="text" value="gamma" label="param_name" help="Name of the parameter that will be varied"/>
+                    <param name="param_range" type="text" value="np.logspace(-6, -1, 5)" label="param_range" help="The values of the parameter that will be evaluated."/>
+                    <!--groups-->                    
+                    <expand macro="model_validation_common_options"/>
+                    <expand macro="scoring"/>
+                    <expand macro="pre_dispatch"/>
+                </section>
+                <param name="return_type" type="select" label="Select a return type">
+                    <option value="train_scores" selected="true">train_scores</option>
+                    <option value="test_scores">test_scores</option>
+                </param>
+            </when>
+        </conditional>
+        <expand macro="sl_mixed_input"/>
+    </inputs>
+    <outputs>
+        <data format="tabular" name="outfile"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="selected_function" value="cross_validate"/>
+            <param name="estimator" value="linear_model.LassoCV()"/>
+            <param name="has_estimator" value="yes"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col1" value="1,2,3,4,5"/>
+            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col2" value="6"/>
+            <output name="outfile" file="mv_result01.tabular"/>
+        </test>
+        <test>
+            <param name="selected_function" value="cross_val_predict"/>
+            <param name="estimator" value="linear_model.LassoCV()"/>
+            <param name="has_estimator" value="yes"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col1" value="1,2,3,4,5"/>
+            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col2" value="6"/>
+            <output name="outfile" file="mv_result02.tabular"/>
+        </test>
+        <test>
+            <param name="selected_function" value="cross_val_score"/>
+            <param name="estimator" value="linear_model.LassoCV()"/>
+            <param name="has_estimator" value="yes"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col1" value="1,2,3,4,5"/>
+            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col2" value="6"/>
+            <output name="outfile" file="mv_result03.tabular"/>
+        </test>
+        <test>
+            <param name="selected_function" value="learning_curve"/>
+            <param name="estimator" value="linear_model.LassoCV()"/>
+            <param name="has_estimator" value="yes"/>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+            <param name="header1" value="true" />
+            <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
+            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+            <param name="header2" value="true" />
+            <param name="col2" value="1"/>
+            <output name="outfile" file="mv_result04.tabular"/>
+        </test>
+        <test>
+            <param name="selected_function" value="permutation_test_score"/>
+            <param name="estimator" value="linear_model.LassoCV()"/>
+            <param name="has_estimator" value="yes"/>
+            <param name="infile1" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col1" value="1,2,3,4,5"/>
+            <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
+            <param name="col2" value="6"/>
+            <output name="outfile" file="mv_result05.tabular"/>
+        </test>
+        <test>
+            <param name="selected_function" value="validation_curve"/>
+            <param name="estimator" value="svm.SVC(kernel=&quot;linear&quot;)"/>
+            <param name="has_estimator" value="yes"/>
+            <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
+            <param name="header1" value="true" />
+            <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
+            <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
+            <param name="header2" value="true" />
+            <param name="col2" value="1"/>
+            <param name="return_type" value="test_scores"/>
+            <output name="outfile" file="mv_result06.tabular"/>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+**What it does**
+This tool includes model validation functions to evaluate estimator performance in the cross-validation approach. This tool is based on
+sklearn.model_selection package.
+For information about classification metric functions and their parameter settings please refer to `Scikit-learn classification metrics`_.
+
+.. _`Scikit-learn classification metrics`: http://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics
+        ]]>
+    </help>
+    <expand macro="sklearn_citation"/>
+</tool>