# HG changeset patch
# User bgruening
# Date 1527683221 14400
# Node ID dd502cb0d5674d573ae8a221e12c0df23ef1ce67
# Parent 02eadaaa4bf76147b1475dc58d57933827ca0db1
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 4ed8c4f6ef9ece81797a398b17a99bbaf49a6978
diff -r 02eadaaa4bf7 -r dd502cb0d567 main_macros.xml
--- a/main_macros.xml Tue May 22 19:33:14 2018 -0400
+++ b/main_macros.xml Wed May 30 08:27:01 2018 -0400
@@ -16,6 +16,47 @@
return y
+## generate an instance for one of sklearn.feature_selection classes
+## must call "@COLUMNS_FUNCTION@"
+
+def feature_selector(inputs):
+ selector = inputs["selected_algorithm"]
+ selector = getattr(sklearn.feature_selection, selector)
+ options = inputs["options"]
+
+ if inputs['selected_algorithm'] == 'SelectFromModel':
+ if not options['threshold'] or options['threshold'] == 'None':
+ options['threshold'] = None
+ if 'extra_estimator' in inputs and inputs['extra_estimator']['has_estimator'] == 'no_load':
+ fitted_estimator = pickle.load(open("inputs['extra_estimator']['fitted_estimator']", 'r'))
+ new_selector = selector(fitted_estimator, prefit=True, **options)
+ else:
+ estimator=inputs["estimator"]
+ if inputs["extra_estimator"]["has_estimator"]=='no':
+ estimator=inputs["extra_estimator"]["new_estimator"]
+ estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
+ new_selector = selector(estimator, **options)
+
+ elif inputs['selected_algorithm'] in ['RFE', 'RFECV']:
+ if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'):
+ options['scoring'] = None
+ estimator=inputs["estimator"]
+ if inputs["extra_estimator"]["has_estimator"]=='no':
+ estimator=inputs["extra_estimator"]["new_estimator"]
+ estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
+ new_selector = selector(estimator, **options)
+
+ elif inputs['selected_algorithm'] == "VarianceThreshold":
+ new_selector = selector(**options)
+
+ else:
+ score_func = inputs["score_func"]
+ score_func = getattr(sklearn.feature_selection, score_func)
+ new_selector = selector(score_func, **options)
+
+ return new_selector
+
+
python
@@ -794,6 +835,13 @@
+
+
+
+
+
+
+
@@ -975,8 +1023,8 @@
-
-
+
+
diff -r 02eadaaa4bf7 -r dd502cb0d567 model_validation.xml
--- a/model_validation.xml Tue May 22 19:33:14 2018 -0400
+++ b/model_validation.xml Wed May 30 08:27:01 2018 -0400
@@ -18,13 +18,17 @@
import sys
import json
import pandas
+import ast
import pickle
import numpy as np
import sklearn.model_selection
from sklearn import svm, linear_model, ensemble
+from sklearn.pipeline import Pipeline
@COLUMNS_FUNCTION@
+@FEATURE_SELECTOR_FUNCTION@
+
input_json_path = sys.argv[1]
params = json.load(open(input_json_path, "r"))
@@ -51,50 +55,90 @@
)
y=y.ravel()
-validator = params["model_validation_functions"]["selected_function"]
-validator = getattr(sklearn.model_selection, validator)
options = params["model_validation_functions"]["options"]
if 'scoring' in options and options['scoring'] == '':
options['scoring'] = None
+if 'pre_dispatch' in options and options['pre_dispatch'] == '':
+ options['pre_dispatch'] = None
+pipeline_steps = []
+
+## Set up feature selector and add to pipeline steps.
+if params['feature_selection']['do_feature_selection'] == 'Yes':
+ feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms'])
+ pipeline_steps.append( ('feature_selector', feature_selector))
+
+## Set up estimator and add to pipeline.
estimator=params["model_validation_functions"]["estimator"]
if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no':
estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"]
estimator = eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
-#if $model_validation_functions.selected_function == 'cross_validate':
-res = validator(estimator, X, y, **options)
-rval = res["$model_validation_functions.return_type"]
+pipeline_steps.append( ('estimator', estimator) )
+
+pipeline = Pipeline(pipeline_steps)
+
+## Set up validator, run pipeline through validator and return results.
-#elif $model_validation_functions.selected_function == 'learning_curve':
-options['train_sizes'] = eval(options['train_sizes'])
-train_sizes_abs, train_scores, test_scores = validator(estimator, X, y, **options)
-rval = eval("$model_validation_functions.return_type")
+validator = params["model_validation_functions"]["selected_function"]
+validator = getattr(sklearn.model_selection, validator)
+
+selected_function = params["model_validation_functions"]["selected_function"]
+rval_type = params["model_validation_functions"].get("return_type", None)
-#elif $model_validation_functions.selected_function == 'permutation_test_score':
-score, permutation_scores, pvalue = validator(estimator, X, y, **options)
-rval = eval("$model_validation_functions.return_type")
-if "$model_validation_functions.return_type" in ["score", "pvalue"]:
- rval = [rval]
-
-#elif $model_validation_functions.selected_function == 'validation_curve':
-options['param_range'] = eval(options['param_range'])
-train_scores, test_scores = validator(estimator, X, y, **options)
-rval = eval("$model_validation_functions.return_type")
-
-#else:
-rval = validator(estimator, X, y, **options)
-#end if
+if selected_function == 'cross_validate':
+ res = validator(pipeline, X, y, **options)
+ rval = res[rval_type]
+elif selected_function == 'learning_curve':
+ options['train_sizes'] = eval(options['train_sizes'])
+ train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options)
+ rval = eval(rval_type)
+elif selected_function == 'permutation_test_score':
+ score, permutation_scores, pvalue = validator(pipeline, X, y, **options)
+ rval = eval(rval_type)
+ if rval_type in ["score", "pvalue"]:
+ rval = [rval]
+elif selected_function == 'validation_curve':
+ options['param_name'] = 'estimator__' + options['param_name']
+ options['param_range'] = eval(options['param_range'])
+ train_scores, test_scores = validator(pipeline, X, y, **options)
+ rval = eval(rval_type)
+elif selected_function == 'GridSearchCV':
+ param_grid = params["model_validation_functions"]["param_grid"].replace("__sq__","'")\
+ .replace('__dq__','"').replace("__oc__", "{").replace("__cc__", "}")\
+ .replace("__ob__", "[").replace("__cb__", "]")
+ param_grid = ast.literal_eval(param_grid)
+ grid = validator(pipeline, param_grid, **options)
+ grid.fit(X, y)
+ rval = getattr(grid, rval_type)
+ if rval_type in ["best_estimator_", "best_score_", "best_index_"]:
+ rval = [rval]
+else:
+ rval = validator(pipeline, X, y, **options)
rval = pandas.DataFrame(rval)
-rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False)
+if rval_type and rval_type == "cv_results_":
+ rval.to_csv(path_or_buf="$outfile", sep='\t', header=True, index=False)
+else:
+ rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False)
]]>
+
+
+
+
+
+
+
+
+
+
+
@@ -102,12 +146,28 @@
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
+
-
-
-
-
-
+
-
-
-
+
-
+
@@ -142,11 +196,7 @@
-
-
-
-
-
+
-
-
-
-
-
+
-
-
-
-
-
+
-
-
-
-
-
+
@@ -287,6 +325,23 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+