Mercurial > repos > bgruening > sklearn_model_validation
comparison model_validation.xml @ 8:fd7a054ffdbd draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f54ff2ba2f8e7542d68966ce5a6b17d7f624ac48
author | bgruening |
---|---|
date | Fri, 13 Jul 2018 03:56:45 -0400 |
parents | 510aa2ce035e |
children | c6b3efcba7bd |
comparison
equal
deleted
inserted
replaced
7:57a7471292df | 8:fd7a054ffdbd |
---|---|
20 import pandas | 20 import pandas |
21 import ast | 21 import ast |
22 import pickle | 22 import pickle |
23 import numpy as np | 23 import numpy as np |
24 import sklearn.model_selection | 24 import sklearn.model_selection |
25 from sklearn import svm, linear_model, ensemble | 25 from sklearn import svm, linear_model, ensemble, preprocessing |
26 from sklearn.pipeline import Pipeline | 26 from sklearn.pipeline import Pipeline |
27 | 27 |
28 @COLUMNS_FUNCTION@ | 28 @COLUMNS_FUNCTION@ |
29 | 29 |
30 @FEATURE_SELECTOR_FUNCTION@ | 30 @FEATURE_SELECTOR_FUNCTION@ |
31 | 31 |
32 input_json_path = sys.argv[1] | 32 input_json_path = sys.argv[1] |
33 params = json.load(open(input_json_path, "r")) | 33 with open(input_json_path, "r") as param_handler: |
34 params = json.load(param_handler) | |
34 | 35 |
35 input_type = params["input_options"]["selected_input"] | 36 input_type = params["input_options"]["selected_input"] |
36 if input_type=="tabular": | 37 if input_type=="tabular": |
37 header = 'infer' if params["input_options"]["header1"] else None | 38 header = 'infer' if params["input_options"]["header1"] else None |
38 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] | 39 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] |
47 sep='\t', | 48 sep='\t', |
48 header=header, | 49 header=header, |
49 parse_dates=True | 50 parse_dates=True |
50 ) | 51 ) |
51 else: | 52 else: |
52 X = mmread(open("$input_options.infile1", 'r')) | 53 X = mmread("$input_options.infile1") |
53 | 54 |
54 header = 'infer' if params["input_options"]["header2"] else None | 55 header = 'infer' if params["input_options"]["header2"] else None |
55 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] | 56 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] |
56 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 57 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: |
57 c = params["input_options"]["column_selector_options_2"]["col2"] | 58 c = params["input_options"]["column_selector_options_2"]["col2"] |
73 if 'pre_dispatch' in options and options['pre_dispatch'] == '': | 74 if 'pre_dispatch' in options and options['pre_dispatch'] == '': |
74 options['pre_dispatch'] = None | 75 options['pre_dispatch'] = None |
75 | 76 |
76 pipeline_steps = [] | 77 pipeline_steps = [] |
77 | 78 |
79 ## Set up pre_processor and add to pipeline steps. | |
80 if params['pre_processing']['do_pre_processing'] == 'Yes': | |
81 preprocessor = params["pre_processing"]["pre_processors"]["selected_pre_processor"] | |
82 pre_processor_options = params["pre_processing"]["pre_processors"]["options"] | |
83 my_class = getattr(preprocessing, preprocessor) | |
84 pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) ) | |
85 | |
78 ## Set up feature selector and add to pipeline steps. | 86 ## Set up feature selector and add to pipeline steps. |
79 if params['feature_selection']['do_feature_selection'] == 'Yes': | 87 if params['feature_selection']['do_feature_selection'] == 'Yes': |
80 feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms']) | 88 feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms']) |
81 pipeline_steps.append( ('feature_selector', feature_selector)) | 89 pipeline_steps.append( ('feature_selector', feature_selector) ) |
82 | 90 |
83 ## Set up estimator and add to pipeline. | 91 ## Set up estimator and add to pipeline. |
84 estimator=params["model_validation_functions"]["estimator"] | 92 estimator=params["model_validation_functions"]["estimator"] |
85 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': | 93 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': |
86 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] | 94 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] |
136 | 144 |
137 ]]> | 145 ]]> |
138 </configfile> | 146 </configfile> |
139 </configfiles> | 147 </configfiles> |
140 <inputs> | 148 <inputs> |
149 <conditional name="pre_processing"> | |
150 <param name="do_pre_processing" type="select" label="Do pre_processing?"> | |
151 <option value="No" selected="true"/> | |
152 <option value="Yes"/> | |
153 </param> | |
154 <when value="No"/> | |
155 <when value="Yes"> | |
156 <conditional name="pre_processors"> | |
157 <expand macro="sparse_preprocessors_ext" /> | |
158 <expand macro="sparse_preprocessor_options_ext" /> | |
159 </conditional> | |
160 </when> | |
161 </conditional> | |
141 <conditional name="feature_selection"> | 162 <conditional name="feature_selection"> |
142 <param name="do_feature_selection" type="select" label="Do feature selection?"> | 163 <param name="do_feature_selection" type="select" label="Do feature selection?"> |
143 <option value="No" selected="true"/> | 164 <option value="No" selected="true"/> |
144 <option value="Yes"/> | 165 <option value="Yes"/> |
145 </param> | 166 </param> |
350 <param name="header1" value="true" /> | 371 <param name="header1" value="true" /> |
351 <param name="selected_column_selector_option" value="all_columns"/> | 372 <param name="selected_column_selector_option" value="all_columns"/> |
352 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 373 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> |
353 <param name="header2" value="true" /> | 374 <param name="header2" value="true" /> |
354 <param name="selected_column_selector_option2" value="all_columns"/> | 375 <param name="selected_column_selector_option2" value="all_columns"/> |
355 <output name="outfile" file="mv_result07.tabular"/> | 376 <output name="outfile" > |
377 <assert_contents> | |
378 <has_line line="0.7824428015300172" /> | |
379 </assert_contents> | |
380 </output> | |
381 </test> | |
382 <test> | |
383 <param name="do_pre_processing" value="Yes"/> | |
384 <param name="selected_pre_processor" value="RobustScaler"/> | |
385 <param name="do_feature_selection" value="Yes"/> | |
386 <param name="selected_algorithm" value="SelectKBest"/> | |
387 <param name="score_func" value="f_classif"/> | |
388 <param name="selected_function" value="GridSearchCV"/> | |
389 <param name="estimator" value="svm.SVR(kernel="linear")"/> | |
390 <param name="has_estimator" value="yes"/> | |
391 <param name="param_grid" value="[{'feature_selector__k': [3, 5, 7, 9], 'estimator__C': [1, 10, 100, 1000]}]"/> | |
392 <param name="return_type" value="best_score_"/> | |
393 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
394 <param name="header1" value="true" /> | |
395 <param name="selected_column_selector_option" value="all_columns"/> | |
396 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
397 <param name="header2" value="true" /> | |
398 <param name="selected_column_selector_option2" value="all_columns"/> | |
399 <output name="outfile" > | |
400 <assert_contents> | |
401 <has_line line="0.7938837807353147" /> | |
402 </assert_contents> | |
403 </output> | |
404 </test> | |
405 <test> | |
406 <param name="do_pre_processing" value="Yes"/> | |
407 <param name="selected_pre_processor" value="RobustScaler"/> | |
408 <param name="selected_function" value="GridSearchCV"/> | |
409 <param name="estimator" value="svm.SVR(kernel="linear")"/> | |
410 <param name="has_estimator" value="yes"/> | |
411 <param name="param_grid" value="[{'estimator__C': [1, 10, 100, 1000]}]"/> | |
412 <param name="return_type" value="best_score_"/> | |
413 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
414 <param name="header1" value="true" /> | |
415 <param name="selected_column_selector_option" value="all_columns"/> | |
416 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | |
417 <param name="header2" value="true" /> | |
418 <param name="selected_column_selector_option2" value="all_columns"/> | |
419 <output name="outfile" > | |
420 <assert_contents> | |
421 <has_line line="0.7904476204861263" /> | |
422 </assert_contents> | |
423 </output> | |
356 </test> | 424 </test> |
357 </tests> | 425 </tests> |
358 <help> | 426 <help> |
359 <![CDATA[ | 427 <![CDATA[ |
360 **What it does** | 428 **What it does** |