comparison model_validation.xml @ 8:fd7a054ffdbd draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit f54ff2ba2f8e7542d68966ce5a6b17d7f624ac48
author bgruening
date Fri, 13 Jul 2018 03:56:45 -0400
parents 510aa2ce035e
children c6b3efcba7bd
comparison
equal deleted inserted replaced
7:57a7471292df 8:fd7a054ffdbd
20 import pandas 20 import pandas
21 import ast 21 import ast
22 import pickle 22 import pickle
23 import numpy as np 23 import numpy as np
24 import sklearn.model_selection 24 import sklearn.model_selection
25 from sklearn import svm, linear_model, ensemble 25 from sklearn import svm, linear_model, ensemble, preprocessing
26 from sklearn.pipeline import Pipeline 26 from sklearn.pipeline import Pipeline
27 27
28 @COLUMNS_FUNCTION@ 28 @COLUMNS_FUNCTION@
29 29
30 @FEATURE_SELECTOR_FUNCTION@ 30 @FEATURE_SELECTOR_FUNCTION@
31 31
32 input_json_path = sys.argv[1] 32 input_json_path = sys.argv[1]
33 params = json.load(open(input_json_path, "r")) 33 with open(input_json_path, "r") as param_handler:
34 params = json.load(param_handler)
34 35
35 input_type = params["input_options"]["selected_input"] 36 input_type = params["input_options"]["selected_input"]
36 if input_type=="tabular": 37 if input_type=="tabular":
37 header = 'infer' if params["input_options"]["header1"] else None 38 header = 'infer' if params["input_options"]["header1"] else None
38 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] 39 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"]
47 sep='\t', 48 sep='\t',
48 header=header, 49 header=header,
49 parse_dates=True 50 parse_dates=True
50 ) 51 )
51 else: 52 else:
52 X = mmread(open("$input_options.infile1", 'r')) 53 X = mmread("$input_options.infile1")
53 54
54 header = 'infer' if params["input_options"]["header2"] else None 55 header = 'infer' if params["input_options"]["header2"] else None
55 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] 56 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"]
56 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: 57 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]:
57 c = params["input_options"]["column_selector_options_2"]["col2"] 58 c = params["input_options"]["column_selector_options_2"]["col2"]
73 if 'pre_dispatch' in options and options['pre_dispatch'] == '': 74 if 'pre_dispatch' in options and options['pre_dispatch'] == '':
74 options['pre_dispatch'] = None 75 options['pre_dispatch'] = None
75 76
76 pipeline_steps = [] 77 pipeline_steps = []
77 78
79 ## Set up pre_processor and add to pipeline steps.
80 if params['pre_processing']['do_pre_processing'] == 'Yes':
81 preprocessor = params["pre_processing"]["pre_processors"]["selected_pre_processor"]
82 pre_processor_options = params["pre_processing"]["pre_processors"]["options"]
83 my_class = getattr(preprocessing, preprocessor)
84 pipeline_steps.append( ('pre_processor', my_class(**pre_processor_options)) )
85
78 ## Set up feature selector and add to pipeline steps. 86 ## Set up feature selector and add to pipeline steps.
79 if params['feature_selection']['do_feature_selection'] == 'Yes': 87 if params['feature_selection']['do_feature_selection'] == 'Yes':
80 feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms']) 88 feature_selector = feature_selector(params['feature_selection']['feature_selection_algorithms'])
81 pipeline_steps.append( ('feature_selector', feature_selector)) 89 pipeline_steps.append( ('feature_selector', feature_selector) )
82 90
83 ## Set up estimator and add to pipeline. 91 ## Set up estimator and add to pipeline.
84 estimator=params["model_validation_functions"]["estimator"] 92 estimator=params["model_validation_functions"]["estimator"]
85 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no': 93 if params["model_validation_functions"]["extra_estimator"]["has_estimator"] == 'no':
86 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"] 94 estimator = params["model_validation_functions"]["extra_estimator"]["new_estimator"]
136 144
137 ]]> 145 ]]>
138 </configfile> 146 </configfile>
139 </configfiles> 147 </configfiles>
140 <inputs> 148 <inputs>
149 <conditional name="pre_processing">
150 <param name="do_pre_processing" type="select" label="Do pre_processing?">
151 <option value="No" selected="true"/>
152 <option value="Yes"/>
153 </param>
154 <when value="No"/>
155 <when value="Yes">
156 <conditional name="pre_processors">
157 <expand macro="sparse_preprocessors_ext" />
158 <expand macro="sparse_preprocessor_options_ext" />
159 </conditional>
160 </when>
161 </conditional>
141 <conditional name="feature_selection"> 162 <conditional name="feature_selection">
142 <param name="do_feature_selection" type="select" label="Do feature selection?"> 163 <param name="do_feature_selection" type="select" label="Do feature selection?">
143 <option value="No" selected="true"/> 164 <option value="No" selected="true"/>
144 <option value="Yes"/> 165 <option value="Yes"/>
145 </param> 166 </param>
350 <param name="header1" value="true" /> 371 <param name="header1" value="true" />
351 <param name="selected_column_selector_option" value="all_columns"/> 372 <param name="selected_column_selector_option" value="all_columns"/>
352 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> 373 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
353 <param name="header2" value="true" /> 374 <param name="header2" value="true" />
354 <param name="selected_column_selector_option2" value="all_columns"/> 375 <param name="selected_column_selector_option2" value="all_columns"/>
355 <output name="outfile" file="mv_result07.tabular"/> 376 <output name="outfile" >
377 <assert_contents>
378 <has_line line="0.7824428015300172" />
379 </assert_contents>
380 </output>
381 </test>
382 <test>
383 <param name="do_pre_processing" value="Yes"/>
384 <param name="selected_pre_processor" value="RobustScaler"/>
385 <param name="do_feature_selection" value="Yes"/>
386 <param name="selected_algorithm" value="SelectKBest"/>
387 <param name="score_func" value="f_classif"/>
388 <param name="selected_function" value="GridSearchCV"/>
389 <param name="estimator" value="svm.SVR(kernel=&quot;linear&quot;)"/>
390 <param name="has_estimator" value="yes"/>
391 <param name="param_grid" value="[{'feature_selector__k': [3, 5, 7, 9], 'estimator__C': [1, 10, 100, 1000]}]"/>
392 <param name="return_type" value="best_score_"/>
393 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
394 <param name="header1" value="true" />
395 <param name="selected_column_selector_option" value="all_columns"/>
396 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
397 <param name="header2" value="true" />
398 <param name="selected_column_selector_option2" value="all_columns"/>
399 <output name="outfile" >
400 <assert_contents>
401 <has_line line="0.7938837807353147" />
402 </assert_contents>
403 </output>
404 </test>
405 <test>
406 <param name="do_pre_processing" value="Yes"/>
407 <param name="selected_pre_processor" value="RobustScaler"/>
408 <param name="selected_function" value="GridSearchCV"/>
409 <param name="estimator" value="svm.SVR(kernel=&quot;linear&quot;)"/>
410 <param name="has_estimator" value="yes"/>
411 <param name="param_grid" value="[{'estimator__C': [1, 10, 100, 1000]}]"/>
412 <param name="return_type" value="best_score_"/>
413 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
414 <param name="header1" value="true" />
415 <param name="selected_column_selector_option" value="all_columns"/>
416 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
417 <param name="header2" value="true" />
418 <param name="selected_column_selector_option2" value="all_columns"/>
419 <output name="outfile" >
420 <assert_contents>
421 <has_line line="0.7904476204861263" />
422 </assert_contents>
423 </output>
356 </test> 424 </test>
357 </tests> 425 </tests>
358 <help> 426 <help>
359 <![CDATA[ 427 <![CDATA[
360 **What it does** 428 **What it does**