Mercurial > repos > bgruening > sklearn_model_validation
diff model_validation.xml @ 16:86e1e2874460 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
author | bgruening |
---|---|
date | Sun, 30 Dec 2018 02:02:32 -0500 |
parents | e244d6f2df1a |
children | cf9aa11b91c8 |
line wrap: on
line diff
--- a/model_validation.xml Thu Oct 11 03:38:11 2018 -0400 +++ b/model_validation.xml Sun Dec 30 02:02:32 2018 -0500 @@ -23,24 +23,26 @@ from sklearn import preprocessing, model_selection, svm, linear_model, ensemble, naive_bayes, tree, neighbors from sklearn.pipeline import Pipeline -exec(open("$__tool_directory__/utils.py").read(), globals()) +exec(open('$__tool_directory__/utils.py').read(), globals()) + +warnings.filterwarnings('ignore') safe_eval = SafeEval() input_json_path = sys.argv[1] -with open(input_json_path, "r") as param_handler: +with open(input_json_path, 'r') as param_handler: params = json.load(param_handler) -input_type = params["input_options"]["selected_input"] -if input_type=="tabular": - header = 'infer' if params["input_options"]["header1"] else None - column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] - if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: - c = params["input_options"]["column_selector_options_1"]["col1"] +input_type = params['input_options']['selected_input'] +if input_type == 'tabular': + header = 'infer' if params['input_options']['header1'] else None + column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] + if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: + c = params['input_options']['column_selector_options_1']['col1'] else: c = None X = read_columns( - "$input_options.infile1", + '$input_options.infile1', c = c, c_option = column_option, sep='\t', @@ -48,16 +50,16 @@ parse_dates=True ) else: - X = mmread("$input_options.infile1") + X = mmread('$input_options.infile1') -header = 'infer' if params["input_options"]["header2"] else None -column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] -if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: - c = params["input_options"]["column_selector_options_2"]["col2"] +header = 'infer' if params['input_options']['header2'] else None +column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] +if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: + c = params['input_options']['column_selector_options_2']['col2'] else: c = None y = read_columns( - "$input_options.infile2", + '$input_options.infile2', c = c, c_option = column_option, sep='\t', @@ -66,8 +68,14 @@ ) y=y.ravel() -options = params["model_validation_functions"]["options"] -options['cv'] = get_cv( options['cv'] ) +options = params['model_validation_functions']['options'] +splitter, groups = get_cv( options.pop('cv_selector') ) +if groups is None: + options['cv'] = splitter +elif groups == '': + options['cv'] = list( splitter.split(X, y, groups=None) ) +else: + options['cv'] = list( splitter.split(X, y, groups=groups) ) options['n_jobs'] = N_JOBS if 'scoring' in options: options['scoring'] = get_scoring(options['scoring']) @@ -78,8 +86,8 @@ ## Set up pre_processor and add to pipeline steps. if params['pre_processing']['do_pre_processing'] == 'Yes': - preprocessor = params["pre_processing"]["pre_processors"]["selected_pre_processor"] - pre_processor_options = params["pre_processing"]["pre_processors"]["options"] + preprocessor = params['pre_processing']['pre_processors']['selected_pre_processor'] + pre_processor_options = params['pre_processing']['pre_processors']['options'] my_class = getattr(preprocessing, preprocessor) pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) ) @@ -89,7 +97,7 @@ pipeline_steps.append( ('feature_selector', feature_selector) ) ## Set up estimator and add to pipeline. -estimator_json = params["model_validation_functions"]['estimator_selector'] +estimator_json = params['model_validation_functions']['estimator_selector'] estimator = get_estimator(estimator_json) pipeline_steps.append( ('estimator', estimator) ) @@ -98,11 +106,11 @@ ## Set up validator, run pipeline through validator and return results. -validator = params["model_validation_functions"]["selected_function"] +validator = params['model_validation_functions']['selected_function'] validator = getattr(model_selection, validator) -selected_function = params["model_validation_functions"]["selected_function"] -rval_type = params["model_validation_functions"].get("return_type", None) +selected_function = params['model_validation_functions']['selected_function'] +rval_type = params['model_validation_functions'].get('return_type', None) if selected_function == 'cross_validate': res = validator(pipeline, X, y, **options) @@ -114,7 +122,7 @@ elif selected_function == 'permutation_test_score': score, permutation_scores, pvalue = validator(pipeline, X, y, **options) rval = eval(rval_type) - if rval_type in ["score", "pvalue"]: + if rval_type in ['score', 'pvalue']: rval = [rval] elif selected_function == 'validation_curve': options['param_name'] = 'estimator__' + options['param_name'] @@ -125,7 +133,7 @@ rval = validator(pipeline, X, y, **options) rval = pandas.DataFrame(rval) -rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) +rval.to_csv(path_or_buf='$outfile', sep='\t', header=False, index=False) ]]> </configfile> @@ -151,9 +159,7 @@ </param> <when value="No"/> <when value="Yes"> - <expand macro="feature_selection_all"> - <expand macro="fs_selectfrommodel_no_prefitted"/> - </expand> + <expand macro="feature_selection_pipeline"/> </when> </conditional> <conditional name="model_validation_functions">