Mercurial > repos > bgruening > sklearn_model_validation
diff model_validation.xml @ 0:333507faecab draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 2e1e78576b38110cf5b1f2ed83b08b9c3a6cbfee
author | bgruening |
---|---|
date | Sat, 28 Apr 2018 18:10:26 -0400 |
parents | |
children | dd502cb0d567 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/model_validation.xml Sat Apr 28 18:10:26 2018 -0400 @@ -0,0 +1,302 @@ +<tool id="sklearn_model_validation" name="Model Validation" version="@VERSION@"> + <description>evaluates estimator performance by cross-validation</description> + <macros> + <import>main_macros.xml</import> + </macros> + <expand macro="python_requirements"/> + <expand macro="macro_stdio"/> + <version_command>echo "@VERSION@"</version_command> + <command> + <![CDATA[ + python "$sklearn_model_validation_script" '$inputs' + ]]> + </command> + <configfiles> + <inputs name="inputs" /> + <configfile name="sklearn_model_validation_script"> + <![CDATA[ +import sys +import json +import pandas +import pickle +import numpy as np +import sklearn.model_selection +from sklearn import svm, linear_model, ensemble + +@COLUMNS_FUNCTION@ + +input_json_path = sys.argv[1] +params = json.load(open(input_json_path, "r")) + +input_type = params["input_options"]["selected_input"] +if input_type=="tabular": + header = 'infer' if params["input_options"]["header1"] else None + X = read_columns( + "$input_options.infile1", + "$input_options.col1", + sep='\t', + header=header, + parse_dates=True + ) +else: + X = mmread(open("$input_options.infile1", 'r')) + +header = 'infer' if params["input_options"]["header2"] else None +y = read_columns( + "$input_options.infile2", + "$input_options.col2", + sep='\t', + header=header, + parse_dates=True +) +y=y.ravel() + +validator = params["model_validation_functions"]["selected_function"] +validator = getattr(sklearn.model_selection, validator) +options = params["model_validation_functions"]["options"] +if 'scoring' in options and options['scoring'] == '': + options['scoring'] = None + +estimator=params["model_validation_functions"]["estimator"] +if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': + estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] +estimator = eval(estimator.replace('__dq__', '"').replace("__sq__","'")) + +#if $model_validation_functions.selected_function == 'cross_validate': +res = validator(estimator, X, y, **options) +rval = res["$model_validation_functions.return_type"] + +#elif $model_validation_functions.selected_function == 'learning_curve': +options['train_sizes'] = eval(options['train_sizes']) +train_sizes_abs, train_scores, test_scores = validator(estimator, X, y, **options) +rval = eval("$model_validation_functions.return_type") + +#elif $model_validation_functions.selected_function == 'permutation_test_score': +score, permutation_scores, pvalue = validator(estimator, X, y, **options) +rval = eval("$model_validation_functions.return_type") +if "$model_validation_functions.return_type" in ["score", "pvalue"]: + rval = [rval] + +#elif $model_validation_functions.selected_function == 'validation_curve': +options['param_range'] = eval(options['param_range']) +train_scores, test_scores = validator(estimator, X, y, **options) +rval = eval("$model_validation_functions.return_type") + +#else: +rval = validator(estimator, X, y, **options) +#end if + +rval = pandas.DataFrame(rval) +rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) + + ]]> + </configfile> + </configfiles> + <inputs> + <conditional name="model_validation_functions"> + <param name="selected_function" type="select" label="Select a model validation function"> + <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option> + <option value="cross_val_predict">cross_val_predict - Generate cross-validated estimates for each input data point</option> + <option value="cross_val_score">cross_val_score - Evaluate a score by cross-validation</option> + <option value="learning_curve">learning_curve - Learning curve</option> + <option value="permutation_test_score">permutation_test_score - Evaluate the significance of a cross-validated score with permutations</option> + <option value="validation_curve">validation_curve - Validation curve</option> + </param> + <when value="cross_validate"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + <section name="options" title="Other Options" expanded="false"> + <!--groups--> + <expand macro="model_validation_common_options"/> + <expand macro="scoring"/> + <!--fit_params--> + <expand macro="pre_dispatch"/> + </section> + <param name="return_type" type="select" label="Select a return type"> + <option value="test_score" selected="true">test_score</option> + <option value="train_score">train_score</option> + <option value="fit_time">fit_time</option> + <option value="score_time">score_time</option> + </param> + </when> + <when value="cross_val_predict"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + <section name="options" title="Other Options" expanded="false"> + <!--groups--> + <param argument="cv" type="integer" value="" optional="true" label="cv" help="The number of folds in a (Stratified)KFold" /> + <expand macro="n_jobs"/> + <expand macro="verbose"/> + <!--fit_params--> + <param argument="pre_dispatch" type="integer" value="" optional="true" label="pre_dispatch" help="Controls the number of jobs that get dispatched during parallel execution" /> + <param argument="method" type="select" label="Invokes the passed method name of the passed estimator"> + <option value="predict" selected="true">predict</option> + <option value="predict_proba">predict_proba</option> + </param> + </section> + </when> + <when value="cross_val_score"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + <section name="options" title="Other Options" expanded="false"> + <!--groups--> + <expand macro="model_validation_common_options"/> + <expand macro="scoring"/> + <!--fit_params--> + <expand macro="pre_dispatch"/> + </section> + </when> + <when value="learning_curve"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + <section name="options" title="Other Options" expanded="false"> + <!--groups--> + <expand macro="model_validation_common_options"/> + <param argument="train_sizes" type="text" value="np.linspace(0.1, 1.0, 5)" label="train_sizes" help="Relative or absolute numbers of training examples that will be used to generate the learning curve"/> + <expand macro="scoring"/> + <param argument="exploit_incremental_learning" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="exploit_incremental_learning" help="Whether to apply incremental learning to speed up fitting of the estimator if supported"/> + <expand macro="pre_dispatch"/> + <expand macro="shuffle" checked="false" label="shuffle" help="Whether to shuffle training data before taking prefixes"/> + <expand macro="random_state"/> + </section> + <param name="return_type" type="select" label="Select a return type"> + <option value="train_sizes_abs" selected="true">train_sizes_abs</option> + <option value="train_scores">train_scores</option> + <option value="test_scores">test_scores</option> + </param> + </when> + <when value="permutation_test_score"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + <section name="options" title="Other Options" expanded="false"> + <!--groups--> + <expand macro="model_validation_common_options"/> + <expand macro="scoring"/> + <param name="n_permutations" type="integer" value="100" optional="true" label="n_permutations" help="Number of times to permute y"/> + <expand macro="random_state"/> + </section> + <param name="return_type" type="select" label="Select a return type"> + <option value="score" selected="true">score</option> + <option value="permutation_scores">permutation_scores</option> + <option value="pvalue">pvalue</option> + </param> + </when> + <when value="validation_curve"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + <section name="options" title="Other Options" expanded="false"> + <param name="param_name" type="text" value="gamma" label="param_name" help="Name of the parameter that will be varied"/> + <param name="param_range" type="text" value="np.logspace(-6, -1, 5)" label="param_range" help="The values of the parameter that will be evaluated."/> + <!--groups--> + <expand macro="model_validation_common_options"/> + <expand macro="scoring"/> + <expand macro="pre_dispatch"/> + </section> + <param name="return_type" type="select" label="Select a return type"> + <option value="train_scores" selected="true">train_scores</option> + <option value="test_scores">test_scores</option> + </param> + </when> + </conditional> + <expand macro="sl_mixed_input"/> + </inputs> + <outputs> + <data format="tabular" name="outfile"/> + </outputs> + <tests> + <test> + <param name="selected_function" value="cross_validate"/> + <param name="estimator" value="linear_model.LassoCV()"/> + <param name="has_estimator" value="yes"/> + <param name="infile1" value="regression_train.tabular" ftype="tabular"/> + <param name="col1" value="1,2,3,4,5"/> + <param name="infile2" value="regression_train.tabular" ftype="tabular"/> + <param name="col2" value="6"/> + <output name="outfile" file="mv_result01.tabular"/> + </test> + <test> + <param name="selected_function" value="cross_val_predict"/> + <param name="estimator" value="linear_model.LassoCV()"/> + <param name="has_estimator" value="yes"/> + <param name="infile1" value="regression_train.tabular" ftype="tabular"/> + <param name="col1" value="1,2,3,4,5"/> + <param name="infile2" value="regression_train.tabular" ftype="tabular"/> + <param name="col2" value="6"/> + <output name="outfile" file="mv_result02.tabular"/> + </test> + <test> + <param name="selected_function" value="cross_val_score"/> + <param name="estimator" value="linear_model.LassoCV()"/> + <param name="has_estimator" value="yes"/> + <param name="infile1" value="regression_train.tabular" ftype="tabular"/> + <param name="col1" value="1,2,3,4,5"/> + <param name="infile2" value="regression_train.tabular" ftype="tabular"/> + <param name="col2" value="6"/> + <output name="outfile" file="mv_result03.tabular"/> + </test> + <test> + <param name="selected_function" value="learning_curve"/> + <param name="estimator" value="linear_model.LassoCV()"/> + <param name="has_estimator" value="yes"/> + <param name="infile1" value="regression_X.tabular" ftype="tabular"/> + <param name="header1" value="true" /> + <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> + <param name="infile2" value="regression_y.tabular" ftype="tabular"/> + <param name="header2" value="true" /> + <param name="col2" value="1"/> + <output name="outfile" file="mv_result04.tabular"/> + </test> + <test> + <param name="selected_function" value="permutation_test_score"/> + <param name="estimator" value="linear_model.LassoCV()"/> + <param name="has_estimator" value="yes"/> + <param name="infile1" value="regression_train.tabular" ftype="tabular"/> + <param name="col1" value="1,2,3,4,5"/> + <param name="infile2" value="regression_train.tabular" ftype="tabular"/> + <param name="col2" value="6"/> + <output name="outfile" file="mv_result05.tabular"/> + </test> + <test> + <param name="selected_function" value="validation_curve"/> + <param name="estimator" value="svm.SVC(kernel="linear")"/> + <param name="has_estimator" value="yes"/> + <param name="infile1" value="regression_X.tabular" ftype="tabular"/> + <param name="header1" value="true" /> + <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> + <param name="infile2" value="regression_y.tabular" ftype="tabular"/> + <param name="header2" value="true" /> + <param name="col2" value="1"/> + <param name="return_type" value="test_scores"/> + <output name="outfile" file="mv_result06.tabular"/> + </test> + </tests> + <help> + <![CDATA[ +**What it does** +This tool includes model validation functions to evaluate estimator performance in the cross-validation approach. This tool is based on +sklearn.model_selection package. +For information about classification metric functions and their parameter settings please refer to `Scikit-learn classification metrics`_. + +.. _`Scikit-learn classification metrics`: http://scikit-learn.org/stable/modules/model_evaluation.html#classification-metrics + ]]> + </help> + <expand macro="sklearn_citation"/> +</tool>