comparison feature_selection.xml @ 18:ec25331946b8 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author bgruening
date Tue, 14 May 2019 18:17:57 -0400
parents 2bbbac61e48d
children 0b88494bdcac
comparison
equal deleted inserted replaced
17:2bbbac61e48d 18:ec25331946b8
2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> 2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="python_requirements"/> 6 <expand macro="python_requirements"/>
7 <!--TODO: Add imblearn package support-->
7 <expand macro="macro_stdio"/> 8 <expand macro="macro_stdio"/>
8 <version_command>echo "@VERSION@"</version_command> 9 <version_command>echo "@VERSION@"</version_command>
9 <command> 10 <command>
10 <![CDATA[ 11 <![CDATA[
11 python "$feature_selection_script" '$inputs' 12 python "$feature_selection_script" '$inputs'
15 <inputs name="inputs" /> 16 <inputs name="inputs" />
16 <configfile name="feature_selection_script"> 17 <configfile name="feature_selection_script">
17 <![CDATA[ 18 <![CDATA[
18 import json 19 import json
19 import sklearn.feature_selection 20 import sklearn.feature_selection
20 21 import skrebate
21 with open('$__tool_directory__/sk_whitelist.json', 'r') as f: 22 import pandas
22 sk_whitelist = json.load(f) 23 import sys
23 exec(open('$__tool_directory__/utils.py').read(), globals()) 24 import warnings
25 import xgboost
26 from sklearn import (
27 cluster, compose, decomposition, ensemble, feature_extraction,
28 feature_selection, gaussian_process, kernel_approximation, metrics,
29 model_selection, naive_bayes, neighbors, pipeline, preprocessing,
30 svm, linear_model, tree, discriminant_analysis)
31 from imblearn.pipeline import Pipeline as imbPipeline
32 from sklearn.pipeline import Pipeline
33
34 sys.path.insert(0, '$__tool_directory__')
35 from utils import SafeEval, feature_selector, read_columns
24 36
25 warnings.simplefilter('ignore') 37 warnings.simplefilter('ignore')
26 38
27 safe_eval = SafeEval() 39 safe_eval = SafeEval()
28 40
29 input_json_path = sys.argv[1] 41 input_json_path = sys.argv[1]
30 with open(input_json_path, 'r') as param_handler: 42 with open(input_json_path, 'r') as param_handler:
31 params = json.load(param_handler) 43 params = json.load(param_handler)
32 44
33 #handle cheetah 45 ## handle cheetah
34 #if $fs_algorithm_selector.selected_algorithm == 'SelectFromModel'\ 46 #if $fs_algorithm_selector.selected_algorithm == 'SelectFromModel'\
35 and $fs_algorithm_selector.model_inputter.input_mode == 'prefitted': 47 and $fs_algorithm_selector.model_inputter.input_mode == 'prefitted':
36 params['fs_algorithm_selector']['model_inputter']['fitted_estimator'] =\ 48 params['fs_algorithm_selector']['model_inputter']['fitted_estimator'] =\
37 '$fs_algorithm_selector.model_inputter.fitted_estimator' 49 '$fs_algorithm_selector.model_inputter.fitted_estimator'
38 #end if 50 #end if
39 51
40 #if $fs_algorithm_selector.selected_algorithm == 'SelectFromModel'\ 52 #if $fs_algorithm_selector.selected_algorithm == 'SelectFromModel'\
41 and $fs_algorithm_selector.model_inputter.input_mode == 'new'\ 53 and $fs_algorithm_selector.model_inputter.input_mode == 'new'\
42 and $fs_algorithm_selector.model_inputter.estimator_selector.selected_module == 'customer_estimator': 54 and $fs_algorithm_selector.model_inputter.estimator_selector.selected_module == 'custom_estimator':
43 params['fs_algorithm_selector']['model_inputter']['estimator_selector']['c_estimator'] =\ 55 params['fs_algorithm_selector']['model_inputter']['estimator_selector']['c_estimator'] =\
44 '$fs_algorithm_selector.model_inputter.estimator_selector.c_estimator' 56 '$fs_algorithm_selector.model_inputter.estimator_selector.c_estimator'
45 #end if 57 #end if
46 58
47 #if $fs_algorithm_selector.selected_algorithm in ['RFE', 'RFECV']\ 59 #if $fs_algorithm_selector.selected_algorithm in ['RFE', 'RFECV', 'DyRFECV']\
48 and $fs_algorithm_selector.estimator_selector.selected_module == 'customer_estimator': 60 and $fs_algorithm_selector.estimator_selector.selected_module == 'custom_estimator':
49 params['fs_algorithm_selector']['estimator_selector']['c_estimator'] =\ 61 params['fs_algorithm_selector']['estimator_selector']['c_estimator'] =\
50 '$fs_algorithm_selector.estimator_selector.c_estimator' 62 '$fs_algorithm_selector.estimator_selector.c_estimator'
51 #end if 63 #end if
52 64
53 # Read features 65 #if $fs_algorithm_selector.selected_algorithm in ['RFECV', 'DyRFECV']\
66 and $fs_algorithm_selector.options.cv_selector.selected_cv\
67 in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut']:
68 params['fs_algorithm_selector']['options']['cv_selector']['groups_selector']['infile_g'] =\
69 '$fs_algorithm_selector.options.cv_selector.groups_selector.infile_g'
70 #end if
71
72 ## Read features
54 features_has_header = params['input_options']['header1'] 73 features_has_header = params['input_options']['header1']
55 input_type = params['input_options']['selected_input'] 74 input_type = params['input_options']['selected_input']
56 if input_type == 'tabular': 75 if input_type == 'tabular':
57 header = 'infer' if features_has_header else None 76 header = 'infer' if features_has_header else None
58 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] 77 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option']
65 c = c, 84 c = c,
66 c_option = column_option, 85 c_option = column_option,
67 return_df = True, 86 return_df = True,
68 sep='\t', 87 sep='\t',
69 header=header, 88 header=header,
70 parse_dates=True 89 parse_dates=True)
71 ) 90 X = X.astype(float)
72 else: 91 else:
73 X = mmread('$input_options.infile1') 92 X = mmread('$input_options.infile1')
74 93
75 # Read labels 94 ## Read labels
76 header = 'infer' if params['input_options']['header2'] else None 95 header = 'infer' if params['input_options']['header2'] else None
77 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] 96 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2']
78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: 97 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
79 c = params['input_options']['column_selector_options_2']['col2'] 98 c = params['input_options']['column_selector_options_2']['col2']
80 else: 99 else:
83 '$input_options.infile2', 102 '$input_options.infile2',
84 c = c, 103 c = c,
85 c_option = column_option, 104 c_option = column_option,
86 sep='\t', 105 sep='\t',
87 header=header, 106 header=header,
88 parse_dates=True 107 parse_dates=True)
89 ) 108 y = y.ravel()
90 y=y.ravel() 109
91 110 ## Create feature selector
92 # Create feature selector 111 new_selector = feature_selector(params['fs_algorithm_selector'], X=X, y=y)
93 new_selector = feature_selector(params['fs_algorithm_selector'])
94 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\ 112 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\
95 or params['fs_algorithm_selector']['model_inputter']['input_mode'] != 'prefitted' : 113 or params['fs_algorithm_selector']['model_inputter']['input_mode'] != 'prefitted' :
96 new_selector.fit(X, y) 114 new_selector.fit(X, y)
97 115
98 ## Transform to select features 116 ## Transform to select features
264 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> 282 <param name="infile2" value="regression_train.tabular" ftype="tabular"/>
265 <param name="col2" value="1"/> 283 <param name="col2" value="1"/>
266 <param name="header2" value="false"/> 284 <param name="header2" value="false"/>
267 <output name="outfile" file="feature_selection_result12"/> 285 <output name="outfile" file="feature_selection_result12"/>
268 </test> 286 </test>
287 <test>
288 <param name="selected_algorithm" value="RFECV"/>
289 <param name="input_mode" value="new"/>
290 <param name="selected_module" value="ensemble"/>
291 <param name="selected_estimator" value="RandomForestRegressor"/>
292 <param name="text_params" value="n_estimators=10, random_state=10"/>
293 <section name="groups_selector">
294 <param name="infile_groups" value="regression_y.tabular" ftype="tabular"/>
295 <param name="header_g" value="true"/>
296 <param name="selected_column_selector_option_g" value="by_index_number"/>
297 <param name="col_g" value="1"/>
298 </section>
299 <param name="selected_cv" value="GroupShuffleSplit"/>
300 <param name="random_state" value="0"/>
301 <param name="infile1" value="regression_X.tabular" ftype="tabular"/>
302 <param name="header1" value="true"/>
303 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/>
304 <param name="infile2" value="regression_y.tabular" ftype="tabular"/>
305 <param name="col2" value="1"/>
306 <param name="header2" value="true"/>
307 <output name="outfile" file="feature_selection_result13"/>
308 </test>
269 </tests> 309 </tests>
270 <help> 310 <help>
271 <![CDATA[ 311 <![CDATA[
272 **What it does** 312 **What it does**
273 This tool provides several loss, score, and utility functions to measure classification performance. Some metrics might require probability estimates of the positive class, confidence values, or binary decisions values. This tool is based on 313 This tool provides several loss, score, and utility functions to measure classification performance. Some metrics might require probability estimates of the positive class, confidence values, or binary decisions values. This tool is based on