Mercurial > repos > bgruening > sklearn_model_validation
comparison model_validation.xml @ 16:86e1e2874460 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 57f4407e278a615f47a377a3328782b1d8e0b54d
author | bgruening |
---|---|
date | Sun, 30 Dec 2018 02:02:32 -0500 |
parents | e244d6f2df1a |
children | cf9aa11b91c8 |
comparison
equal
deleted
inserted
replaced
15:33d2606fdb3f | 16:86e1e2874460 |
---|---|
21 import pandas | 21 import pandas |
22 import numpy as np | 22 import numpy as np |
23 from sklearn import preprocessing, model_selection, svm, linear_model, ensemble, naive_bayes, tree, neighbors | 23 from sklearn import preprocessing, model_selection, svm, linear_model, ensemble, naive_bayes, tree, neighbors |
24 from sklearn.pipeline import Pipeline | 24 from sklearn.pipeline import Pipeline |
25 | 25 |
26 exec(open("$__tool_directory__/utils.py").read(), globals()) | 26 exec(open('$__tool_directory__/utils.py').read(), globals()) |
27 | |
28 warnings.filterwarnings('ignore') | |
27 | 29 |
28 safe_eval = SafeEval() | 30 safe_eval = SafeEval() |
29 | 31 |
30 input_json_path = sys.argv[1] | 32 input_json_path = sys.argv[1] |
31 with open(input_json_path, "r") as param_handler: | 33 with open(input_json_path, 'r') as param_handler: |
32 params = json.load(param_handler) | 34 params = json.load(param_handler) |
33 | 35 |
34 input_type = params["input_options"]["selected_input"] | 36 input_type = params['input_options']['selected_input'] |
35 if input_type=="tabular": | 37 if input_type == 'tabular': |
36 header = 'infer' if params["input_options"]["header1"] else None | 38 header = 'infer' if params['input_options']['header1'] else None |
37 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] | 39 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] |
38 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 40 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
39 c = params["input_options"]["column_selector_options_1"]["col1"] | 41 c = params['input_options']['column_selector_options_1']['col1'] |
40 else: | 42 else: |
41 c = None | 43 c = None |
42 X = read_columns( | 44 X = read_columns( |
43 "$input_options.infile1", | 45 '$input_options.infile1', |
44 c = c, | 46 c = c, |
45 c_option = column_option, | 47 c_option = column_option, |
46 sep='\t', | 48 sep='\t', |
47 header=header, | 49 header=header, |
48 parse_dates=True | 50 parse_dates=True |
49 ) | 51 ) |
50 else: | 52 else: |
51 X = mmread("$input_options.infile1") | 53 X = mmread('$input_options.infile1') |
52 | 54 |
53 header = 'infer' if params["input_options"]["header2"] else None | 55 header = 'infer' if params['input_options']['header2'] else None |
54 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] | 56 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] |
55 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 57 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
56 c = params["input_options"]["column_selector_options_2"]["col2"] | 58 c = params['input_options']['column_selector_options_2']['col2'] |
57 else: | 59 else: |
58 c = None | 60 c = None |
59 y = read_columns( | 61 y = read_columns( |
60 "$input_options.infile2", | 62 '$input_options.infile2', |
61 c = c, | 63 c = c, |
62 c_option = column_option, | 64 c_option = column_option, |
63 sep='\t', | 65 sep='\t', |
64 header=header, | 66 header=header, |
65 parse_dates=True | 67 parse_dates=True |
66 ) | 68 ) |
67 y=y.ravel() | 69 y=y.ravel() |
68 | 70 |
69 options = params["model_validation_functions"]["options"] | 71 options = params['model_validation_functions']['options'] |
70 options['cv'] = get_cv( options['cv'] ) | 72 splitter, groups = get_cv( options.pop('cv_selector') ) |
73 if groups is None: | |
74 options['cv'] = splitter | |
75 elif groups == '': | |
76 options['cv'] = list( splitter.split(X, y, groups=None) ) | |
77 else: | |
78 options['cv'] = list( splitter.split(X, y, groups=groups) ) | |
71 options['n_jobs'] = N_JOBS | 79 options['n_jobs'] = N_JOBS |
72 if 'scoring' in options: | 80 if 'scoring' in options: |
73 options['scoring'] = get_scoring(options['scoring']) | 81 options['scoring'] = get_scoring(options['scoring']) |
74 if 'pre_dispatch' in options and options['pre_dispatch'] == '': | 82 if 'pre_dispatch' in options and options['pre_dispatch'] == '': |
75 options['pre_dispatch'] = None | 83 options['pre_dispatch'] = None |
76 | 84 |
77 pipeline_steps = [] | 85 pipeline_steps = [] |
78 | 86 |
79 ## Set up pre_processor and add to pipeline steps. | 87 ## Set up pre_processor and add to pipeline steps. |
80 if params['pre_processing']['do_pre_processing'] == 'Yes': | 88 if params['pre_processing']['do_pre_processing'] == 'Yes': |
81 preprocessor = params["pre_processing"]["pre_processors"]["selected_pre_processor"] | 89 preprocessor = params['pre_processing']['pre_processors']['selected_pre_processor'] |
82 pre_processor_options = params["pre_processing"]["pre_processors"]["options"] | 90 pre_processor_options = params['pre_processing']['pre_processors']['options'] |
83 my_class = getattr(preprocessing, preprocessor) | 91 my_class = getattr(preprocessing, preprocessor) |
84 pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) ) | 92 pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) ) |
85 | 93 |
86 ## Set up feature selector and add to pipeline steps. | 94 ## Set up feature selector and add to pipeline steps. |
87 if params['feature_selection']['do_feature_selection'] == 'Yes': | 95 if params['feature_selection']['do_feature_selection'] == 'Yes': |
88 feature_selector = feature_selector(params['feature_selection']['fs_algorithm_selector']) | 96 feature_selector = feature_selector(params['feature_selection']['fs_algorithm_selector']) |
89 pipeline_steps.append( ('feature_selector', feature_selector) ) | 97 pipeline_steps.append( ('feature_selector', feature_selector) ) |
90 | 98 |
91 ## Set up estimator and add to pipeline. | 99 ## Set up estimator and add to pipeline. |
92 estimator_json = params["model_validation_functions"]['estimator_selector'] | 100 estimator_json = params['model_validation_functions']['estimator_selector'] |
93 estimator = get_estimator(estimator_json) | 101 estimator = get_estimator(estimator_json) |
94 | 102 |
95 pipeline_steps.append( ('estimator', estimator) ) | 103 pipeline_steps.append( ('estimator', estimator) ) |
96 | 104 |
97 pipeline = Pipeline(pipeline_steps) | 105 pipeline = Pipeline(pipeline_steps) |
98 | 106 |
99 ## Set up validator, run pipeline through validator and return results. | 107 ## Set up validator, run pipeline through validator and return results. |
100 | 108 |
101 validator = params["model_validation_functions"]["selected_function"] | 109 validator = params['model_validation_functions']['selected_function'] |
102 validator = getattr(model_selection, validator) | 110 validator = getattr(model_selection, validator) |
103 | 111 |
104 selected_function = params["model_validation_functions"]["selected_function"] | 112 selected_function = params['model_validation_functions']['selected_function'] |
105 rval_type = params["model_validation_functions"].get("return_type", None) | 113 rval_type = params['model_validation_functions'].get('return_type', None) |
106 | 114 |
107 if selected_function == 'cross_validate': | 115 if selected_function == 'cross_validate': |
108 res = validator(pipeline, X, y, **options) | 116 res = validator(pipeline, X, y, **options) |
109 rval = res[rval_type] | 117 rval = res[rval_type] |
110 elif selected_function == 'learning_curve': | 118 elif selected_function == 'learning_curve': |
112 train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options) | 120 train_sizes_abs, train_scores, test_scores = validator(pipeline, X, y, **options) |
113 rval = eval(rval_type) | 121 rval = eval(rval_type) |
114 elif selected_function == 'permutation_test_score': | 122 elif selected_function == 'permutation_test_score': |
115 score, permutation_scores, pvalue = validator(pipeline, X, y, **options) | 123 score, permutation_scores, pvalue = validator(pipeline, X, y, **options) |
116 rval = eval(rval_type) | 124 rval = eval(rval_type) |
117 if rval_type in ["score", "pvalue"]: | 125 if rval_type in ['score', 'pvalue']: |
118 rval = [rval] | 126 rval = [rval] |
119 elif selected_function == 'validation_curve': | 127 elif selected_function == 'validation_curve': |
120 options['param_name'] = 'estimator__' + options['param_name'] | 128 options['param_name'] = 'estimator__' + options['param_name'] |
121 options['param_range'] = eval(options['param_range']) | 129 options['param_range'] = eval(options['param_range']) |
122 train_scores, test_scores = validator(pipeline, X, y, **options) | 130 train_scores, test_scores = validator(pipeline, X, y, **options) |
123 rval = eval(rval_type) | 131 rval = eval(rval_type) |
124 else: | 132 else: |
125 rval = validator(pipeline, X, y, **options) | 133 rval = validator(pipeline, X, y, **options) |
126 | 134 |
127 rval = pandas.DataFrame(rval) | 135 rval = pandas.DataFrame(rval) |
128 rval.to_csv(path_or_buf="$outfile", sep='\t', header=False, index=False) | 136 rval.to_csv(path_or_buf='$outfile', sep='\t', header=False, index=False) |
129 | 137 |
130 ]]> | 138 ]]> |
131 </configfile> | 139 </configfile> |
132 </configfiles> | 140 </configfiles> |
133 <inputs> | 141 <inputs> |
149 <option value="No" selected="true"/> | 157 <option value="No" selected="true"/> |
150 <option value="Yes"/> | 158 <option value="Yes"/> |
151 </param> | 159 </param> |
152 <when value="No"/> | 160 <when value="No"/> |
153 <when value="Yes"> | 161 <when value="Yes"> |
154 <expand macro="feature_selection_all"> | 162 <expand macro="feature_selection_pipeline"/> |
155 <expand macro="fs_selectfrommodel_no_prefitted"/> | |
156 </expand> | |
157 </when> | 163 </when> |
158 </conditional> | 164 </conditional> |
159 <conditional name="model_validation_functions"> | 165 <conditional name="model_validation_functions"> |
160 <param name="selected_function" type="select" label="Select a model validation function"> | 166 <param name="selected_function" type="select" label="Select a model validation function"> |
161 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option> | 167 <option value="cross_validate">cross_validate - Evaluate metric(s) by cross-validation and also record fit/score times</option> |