# HG changeset patch # User bgruening # Date 1527683221 14400 # Node ID dd502cb0d5674d573ae8a221e12c0df23ef1ce67 # Parent 02eadaaa4bf76147b1475dc58d57933827ca0db1 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 4ed8c4f6ef9ece81797a398b17a99bbaf49a6978 diff -r 02eadaaa4bf7 -r dd502cb0d567 main_macros.xml --- a/main_macros.xml Tue May 22 19:33:14 2018 -0400 +++ b/main_macros.xml Wed May 30 08:27:01 2018 -0400 @@ -16,6 +16,47 @@ return y +## generate an instance for one of sklearn.feature_selection classes +## must call "@COLUMNS_FUNCTION@" + +def feature_selector(inputs): + selector = inputs["selected_algorithm"] + selector = getattr(sklearn.feature_selection, selector) + options = inputs["options"] + + if inputs['selected_algorithm'] == 'SelectFromModel': + if not options['threshold'] or options['threshold'] == 'None': + options['threshold'] = None + if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load': + fitted_estimator = pickle.load(open("inputs['extra_estimator']['fitted_estimator']", 'r')) + new_selector = selector(fitted_estimator, prefit=True, **options) + else: + estimator=inputs["estimator"] + if inputs["extra_estimator"]["has_estimator"]=='no': + estimator=inputs["extra_estimator"]["new_estimator"] + estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) + new_selector = selector(estimator, **options) + + elif inputs['selected_algorithm'] in ['RFE', 'RFECV']: + if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'): + options['scoring'] = None + estimator=inputs["estimator"] + if inputs["extra_estimator"]["has_estimator"]=='no': + estimator=inputs["extra_estimator"]["new_estimator"] + estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'")) + new_selector = selector(estimator, **options) + + elif inputs['selected_algorithm'] == "VarianceThreshold": + new_selector = selector(**options) + + else: + score_func = inputs["score_func"] + score_func = getattr(sklearn.feature_selection, score_func) + new_selector = selector(score_func, **options) + + return new_selector + + python @@ -794,6 +835,13 @@ + + + + + + + @@ -975,8 +1023,8 @@ - - + + diff -r 02eadaaa4bf7 -r dd502cb0d567 model_validation.xml --- a/model_validation.xml Tue May 22 19:33:14 2018 -0400 +++ b/model_validation.xml Wed May 30 08:27:01 2018 -0400 @@ -18,13 +18,17 @@ import sys import json import pandas +import ast import pickle import numpy as np import sklearn.model_selection from sklearn import svm, linear_model, ensemble +from sklearn.pipeline import Pipeline @COLUMNS_FUNCTION@ +@FEATURE_SELECTOR_FUNCTION@ + input_json_path = sys.argv[1] params = json.load(open(input_json_path, "r")) @@ -51,50 +55,90 @@ ) y=y.ravel() -validator = params["model_validation_functions"]["selected_function"] -validator = getattr(sklearn.model_selection, validator) options = params["model_validation_functions"]["options"] if 'scoring' in options and options['scoring'] == '': options['scoring'] = None +if 'pre_dispatch' in options and options['pre_dispatch'] == '': + options['pre_dispatch'] = None +pipeline_steps = [] + +## Set up feature selector and add to pipeline steps. +if params['feature_selection']['do_feature_selection'] == 'Yes': + feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms']) + pipeline_steps.append( ('feature_selector', feature_selector)) + +## Set up estimator and add to pipeline. estimator=params["model_validation_functions"]["estimator"] if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] estimator = eval(estimator.replace('__dq__', '"').replace("__sq__","'")) -#if $model_validation_functions.selected_function == 'cross_validate': -res = validator(estimator, X, y, **options) -rval = res["$model_validation_functions.return_type"] +pipeline_steps.append( ('estimator', estimator) ) + +pipeline = Pipeline(pipeline_steps) + +## Set up validator, run pipeline through validator and return results. -#elif $model_validation_functions.selected_function == 'learning_curve': -options['train_sizes'] = eval(options['train_sizes']) -train_sizes_abs, train_scores, test_scores = validator(estimator, X, y, **options) -rval = eval("$model_validation_functions.return_type") +validator = params["model_validation_functions"]["selected_function"] +validator = getattr(sklearn.model_selection, validator) + +selected_function = params["model_validation_functions"]["selected_function"] +rval_type = params["model_validation_functions"].get("return_type", None) -#elif $model_validation_functions.selected_function == 'permutation_test_score': -score, permutation_scores, pvalue = validator(estimator, X, y, **options) -rval = eval("$model_validation_functions.return_type") -if "$model_validation_functions.return_type" in ["score", "pvalue"]: - rval = [rval] - -#elif $model_validation_functions.selected_function == 'validation_curve': -options['param_range'] = eval(options['param_range']) -train_scores, test_scores = validator(estimator, X, y, **options) -rval = eval("$model_validation_functions.return_type") - -#else: -rval = validator(estimator, X, y, **options) -#end if +if selected_function == 'cross_validate': + res = validator(pipeline, X, y, **options) + rval = res[rval_type] +elif selected_function == 'learning_curve': + options['train_sizes'] = eval(options['train_sizes']) + train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options) + rval = eval(rval_type) +elif selected_function == 'permutation_test_score': + score, permutation_scores, pvalue = validator(pipeline, X, y, **options) + rval = eval(rval_type) + if rval_type in ["score", "pvalue"]: + rval = [rval] +elif selected_function == 'validation_curve': + options['param_name'] = 'estimator__' + options['param_name'] + options['param_range'] = eval(options['param_range']) + train_scores, test_scores = validator(pipeline, X, y, **options) + rval = eval(rval_type) +elif selected_function == 'GridSearchCV': + param_grid = params["model_validation_functions"]["param_grid"].replace("__sq__","'")\ + .replace('__dq__','"').replace("__oc__", "{").replace("__cc__", "}")\ + .replace("__ob__", "[").replace("__cb__", "]") + param_grid = ast.literal_eval(param_grid) + grid = validator(pipeline, param_grid, **options) + grid.fit(X, y) + rval = getattr(grid, rval_type) + if rval_type in ["best_estimator_", "best_score_", "best_index_"]: + rval = [rval] +else: + rval = validator(pipeline, X, y, **options) rval = pandas.DataFrame(rval) -rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) +if rval_type and rval_type == "cv_results_": + rval.to_csv(path_or_buf="$outfile", sep='\t', header=True, index=False) +else: + rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) ]]> + + + + @@ -102,12 +146,28 @@ + + + +

+ + + + + + + +

+ + + + + + + + - - - - - +

@@ -123,18 +183,12 @@ - - - - - +

- - - + - + @@ -142,11 +196,7 @@

- - - - - +

@@ -156,11 +206,7 @@

- - - - - +

@@ -178,11 +224,7 @@ - - - - - +

@@ -197,11 +239,7 @@ - - - - - +

@@ -287,6 +325,23 @@ + + + + + + + + + + + + + + + + +