Mercurial > repos > bgruening > sklearn_model_validation
diff model_validation.xml @ 8:fd7a054ffdbd draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f54ff2ba2f8e7542d68966ce5a6b17d7f624ac48
author | bgruening |
---|---|
date | Fri, 13 Jul 2018 03:56:45 -0400 |
parents | 510aa2ce035e |
children | c6b3efcba7bd |
line wrap: on
line diff
--- a/model_validation.xml Tue Jul 10 03:13:16 2018 -0400 +++ b/model_validation.xml Fri Jul 13 03:56:45 2018 -0400 @@ -22,7 +22,7 @@ import pickle import numpy as np import sklearn.model_selection -from sklearn import svm, linear_model, ensemble +from sklearn import svm, linear_model, ensemble, preprocessing from sklearn.pipeline import Pipeline @COLUMNS_FUNCTION@ @@ -30,7 +30,8 @@ @FEATURE_SELECTOR_FUNCTION@ input_json_path = sys.argv[1] -params = json.load(open(input_json_path, "r")) +with open(input_json_path, "r") as param_handler: + params = json.load(param_handler) input_type = params["input_options"]["selected_input"] if input_type=="tabular": @@ -49,7 +50,7 @@ parse_dates=True ) else: - X = mmread(open("$input_options.infile1", 'r')) + X = mmread("$input_options.infile1") header = 'infer' if params["input_options"]["header2"] else None column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] @@ -75,10 +76,17 @@ pipeline_steps = [] +## Set up pre_processor and add to pipeline steps. +if params['pre_processing']['do_pre_processing'] == 'Yes': + preprocessor = params["pre_processing"]["pre_processors"]["selected_pre_processor"] + pre_processor_options = params["pre_processing"]["pre_processors"]["options"] + my_class = getattr(preprocessing, preprocessor) + pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) ) + ## Set up feature selector and add to pipeline steps. if params['feature_selection']['do_feature_selection'] == 'Yes': feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms']) - pipeline_steps.append( ('feature_selector', feature_selector)) + pipeline_steps.append( ('feature_selector', feature_selector) ) ## Set up estimator and add to pipeline. estimator=params["model_validation_functions"]["estimator"] @@ -138,6 +146,19 @@ </configfile> </configfiles> <inputs> + <conditional name="pre_processing"> + <param name="do_pre_processing" type="select" label="Do pre_processing?"> + <option value="No" selected="true"/> + <option value="Yes"/> + </param> + <when value="No"/> + <when value="Yes"> + <conditional name="pre_processors"> + <expand macro="sparse_preprocessors_ext" /> + <expand macro="sparse_preprocessor_options_ext" /> + </conditional> + </when> + </conditional> <conditional name="feature_selection"> <param name="do_feature_selection" type="select" label="Do feature selection?"> <option value="No" selected="true"/> @@ -352,7 +373,54 @@ <param name="infile2" value="regression_y.tabular" ftype="tabular"/> <param name="header2" value="true" /> <param name="selected_column_selector_option2" value="all_columns"/> - <output name="outfile" file="mv_result07.tabular"/> + <output name="outfile" > + <assert_contents> + <has_line line="0.7824428015300172" /> + </assert_contents> + </output> + </test> + <test> + <param name="do_pre_processing" value="Yes"/> + <param name="selected_pre_processor" value="RobustScaler"/> + <param name="do_feature_selection" value="Yes"/> + <param name="selected_algorithm" value="SelectKBest"/> + <param name="score_func" value="f_classif"/> + <param name="selected_function" value="GridSearchCV"/> + <param name="estimator" value="svm.SVR(kernel="linear")"/> + <param name="has_estimator" value="yes"/> + <param name="param_grid" value="[{'feature_selector__k': [3, 5, 7, 9], 'estimator__C': [1, 10, 100, 1000]}]"/> + <param name="return_type" value="best_score_"/> + <param name="infile1" value="regression_X.tabular" ftype="tabular"/> + <param name="header1" value="true" /> + <param name="selected_column_selector_option" value="all_columns"/> + <param name="infile2" value="regression_y.tabular" ftype="tabular"/> + <param name="header2" value="true" /> + <param name="selected_column_selector_option2" value="all_columns"/> + <output name="outfile" > + <assert_contents> + <has_line line="0.7938837807353147" /> + </assert_contents> + </output> + </test> + <test> + <param name="do_pre_processing" value="Yes"/> + <param name="selected_pre_processor" value="RobustScaler"/> + <param name="selected_function" value="GridSearchCV"/> + <param name="estimator" value="svm.SVR(kernel="linear")"/> + <param name="has_estimator" value="yes"/> + <param name="param_grid" value="[{'estimator__C': [1, 10, 100, 1000]}]"/> + <param name="return_type" value="best_score_"/> + <param name="infile1" value="regression_X.tabular" ftype="tabular"/> + <param name="header1" value="true" /> + <param name="selected_column_selector_option" value="all_columns"/> + <param name="infile2" value="regression_y.tabular" ftype="tabular"/> + <param name="header2" value="true" /> + <param name="selected_column_selector_option2" value="all_columns"/> + <output name="outfile" > + <assert_contents> + <has_line line="0.7904476204861263" /> + </assert_contents> + </output> </test> </tests> <help>