Mercurial > repos > bgruening > sklearn_feature_selection
comparison feature_selection.xml @ 10:96f9b73327f2 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 76583c1fcd9d06a4679cc46ffaee44117b9e22cd
author | bgruening |
---|---|
date | Sat, 04 Aug 2018 12:35:10 -0400 |
parents | 537c6763c018 |
children | f8dfdb47508b |
comparison
equal
deleted
inserted
replaced
9:537c6763c018 | 10:96f9b73327f2 |
---|---|
17 <![CDATA[ | 17 <![CDATA[ |
18 import sys | 18 import sys |
19 import json | 19 import json |
20 import pandas | 20 import pandas |
21 import pickle | 21 import pickle |
22 import ast | |
22 import numpy as np | 23 import numpy as np |
24 import xgboost | |
23 import sklearn.feature_selection | 25 import sklearn.feature_selection |
24 from sklearn import svm, linear_model, ensemble | 26 from sklearn import svm, linear_model, ensemble, naive_bayes, tree, neighbors |
25 | 27 |
26 @COLUMNS_FUNCTION@ | 28 @COLUMNS_FUNCTION@ |
27 | 29 @GET_ESTIMATOR_FUNCTION@ |
28 @FEATURE_SELECTOR_FUNCTION@ | 30 @FEATURE_SELECTOR_FUNCTION@ |
29 | 31 |
30 input_json_path = sys.argv[1] | 32 input_json_path = sys.argv[1] |
31 with open(input_json_path, "r") as param_handler: | 33 with open(input_json_path, "r") as param_handler: |
32 params = json.load(param_handler) | 34 params = json.load(param_handler) |
33 | 35 |
34 ## Read features | 36 #handle cheetah |
37 #if $fs_algorithm_selector.selected_algorithm == "SelectFromModel"\ | |
38 and $fs_algorithm_selector.model_inputter.input_mode == "prefitted": | |
39 params['fs_algorithm_selector']['model_inputter']['fitted_estimator'] =\ | |
40 "$fs_algorithm_selector.model_inputter.fitted_estimator" | |
41 #end if | |
42 | |
43 # Read features | |
35 features_has_header = params["input_options"]["header1"] | 44 features_has_header = params["input_options"]["header1"] |
36 input_type = params["input_options"]["selected_input"] | 45 input_type = params["input_options"]["selected_input"] |
37 if input_type=="tabular": | 46 if input_type=="tabular": |
38 header = 'infer' if features_has_header else None | 47 header = 'infer' if features_has_header else None |
39 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] | 48 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] |
51 parse_dates=True | 60 parse_dates=True |
52 ) | 61 ) |
53 else: | 62 else: |
54 X = mmread("$input_options.infile1") | 63 X = mmread("$input_options.infile1") |
55 | 64 |
56 ## Read labels | 65 # Read labels |
57 header = 'infer' if params["input_options"]["header2"] else None | 66 header = 'infer' if params["input_options"]["header2"] else None |
58 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] | 67 column_option = params["input_options"]["column_selector_options_2"]["selected_column_selector_option2"] |
59 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: | 68 if column_option in ["by_index_number", "all_but_by_index_number", "by_header_name", "all_but_by_header_name"]: |
60 c = params["input_options"]["column_selector_options_2"]["col2"] | 69 c = params["input_options"]["column_selector_options_2"]["col2"] |
61 else: | 70 else: |
68 header=header, | 77 header=header, |
69 parse_dates=True | 78 parse_dates=True |
70 ) | 79 ) |
71 y=y.ravel() | 80 y=y.ravel() |
72 | 81 |
73 ## Create feature selector | 82 # Create feature selector |
74 new_selector = feature_selector(params['feature_selection_algorithms']) | 83 new_selector = feature_selector(params['fs_algorithm_selector']) |
75 if params['feature_selection_algorithms']['selected_algorithm'] != 'SelectFromModel' or \ | 84 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\ |
76 'extra_estimator' not in params['feature_selection_algorithms'] or \ | 85 or params['fs_algorithm_selector']['model_inputter']['input_mode'] != 'prefitted' : |
77 params['feature_selection_algorithms']['extra_estimator']['has_estimator'] != 'no_load' : | |
78 new_selector.fit(X, y) | 86 new_selector.fit(X, y) |
79 | 87 |
80 ## Transform to select features | 88 ## Transform to select features |
81 selected_names = None | 89 selected_names = None |
82 if "$select_methods.selected_method" == "fit_transform": | 90 if "$output_method_selector.selected_method" == "fit_transform": |
83 res = new_selector.transform(X) | 91 res = new_selector.transform(X) |
84 if features_has_header: | 92 if features_has_header: |
85 selected_names = input_df.columns[new_selector.get_support(indices=True)] | 93 selected_names = input_df.columns[new_selector.get_support(indices=True)] |
86 else: | 94 else: |
87 res = new_selector.get_support(params["select_methods"]["indices"]) | 95 res = new_selector.get_support(params["output_method_selector"]["indices"]) |
88 | 96 |
89 res = pandas.DataFrame(res, columns = selected_names) | 97 res = pandas.DataFrame(res, columns = selected_names) |
90 res.to_csv(path_or_buf="$outfile", sep='\t', index=False) | 98 res.to_csv(path_or_buf="$outfile", sep='\t', index=False) |
91 | 99 |
92 | 100 |
93 ]]> | 101 ]]> |
94 </configfile> | 102 </configfile> |
95 </configfiles> | 103 </configfiles> |
96 <inputs> | 104 <inputs> |
97 <expand macro="feature_selection_all" /> | 105 <expand macro="feature_selection_all"> |
98 <expand macro="feature_selection_methods" /> | 106 <expand macro="fs_selectfrommodel_prefitted"/> |
107 </expand> | |
108 <expand macro="feature_selection_output_mothods" /> | |
99 <expand macro="sl_mixed_input"/> | 109 <expand macro="sl_mixed_input"/> |
100 </inputs> | 110 </inputs> |
101 <outputs> | 111 <outputs> |
102 <data format="tabular" name="outfile"/> | 112 <data format="tabular" name="outfile"/> |
103 </outputs> | 113 </outputs> |
104 <tests> | 114 <tests> |
105 <test> | 115 <test> |
106 <param name="selected_algorithm" value="SelectFromModel"/> | 116 <param name="selected_algorithm" value="SelectFromModel"/> |
107 <param name="has_estimator" value="no"/> | 117 <param name="input_mode" value="new"/> |
108 <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/> | 118 <param name="selected_module" value="ensemble"/> |
109 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 119 <param name="selected_estimator" value="RandomForestRegressor"/> |
110 <param name="header1" value="True"/> | 120 <param name="text_params" value="'n_estimators': 10, 'random_state': 10"/> |
111 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | 121 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> |
112 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 122 <param name="header1" value="false"/> |
113 <param name="col2" value="1"/> | 123 <param name="col1" value="1,2,3,4,5"/> |
114 <param name="header2" value="True"/> | 124 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> |
125 <param name="col2" value="6"/> | |
126 <param name="header2" value="false"/> | |
115 <output name="outfile" file="feature_selection_result01"/> | 127 <output name="outfile" file="feature_selection_result01"/> |
116 </test> | 128 </test> |
117 <test> | 129 <test> |
118 <param name="selected_algorithm" value="GenericUnivariateSelect"/> | 130 <param name="selected_algorithm" value="GenericUnivariateSelect"/> |
119 <param name="param" value="20"/> | 131 <param name="param" value="20"/> |
178 <param name="header2" value="True"/> | 190 <param name="header2" value="True"/> |
179 <output name="outfile" file="feature_selection_result07"/> | 191 <output name="outfile" file="feature_selection_result07"/> |
180 </test> | 192 </test> |
181 <test> | 193 <test> |
182 <param name="selected_algorithm" value="RFE"/> | 194 <param name="selected_algorithm" value="RFE"/> |
183 <param name="has_estimator" value="no"/> | 195 <param name="input_mode" value="new"/> |
184 <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/> | 196 <param name="selected_module" value="ensemble"/> |
185 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 197 <param name="selected_estimator" value="RandomForestRegressor"/> |
186 <param name="header1" value="True"/> | 198 <param name="text_params" value="'n_estimators': 10, 'random_state':10"/> |
187 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | 199 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> |
188 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 200 <param name="header1" value="false"/> |
189 <param name="col2" value="1"/> | 201 <param name="col1" value="1,2,3,4,5"/> |
190 <param name="header2" value="True"/> | 202 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> |
203 <param name="col2" value="6"/> | |
204 <param name="header2" value="false"/> | |
191 <output name="outfile" file="feature_selection_result08"/> | 205 <output name="outfile" file="feature_selection_result08"/> |
192 </test> | 206 </test> |
193 <test> | 207 <test> |
194 <param name="selected_algorithm" value="RFECV"/> | 208 <param name="selected_algorithm" value="RFECV"/> |
195 <param name="has_estimator" value="no"/> | 209 <param name="input_mode" value="new"/> |
196 <param name="new_estimator" value="ensemble.RandomForestRegressor(n_estimators = 1000, random_state = 42)"/> | 210 <param name="selected_module" value="ensemble"/> |
197 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | 211 <param name="selected_estimator" value="RandomForestRegressor"/> |
198 <param name="header1" value="True"/> | 212 <param name="text_params" value="'n_estimators': 10, 'random_state':10"/> |
199 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | 213 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> |
200 <param name="infile2" value="regression_y.tabular" ftype="tabular"/> | 214 <param name="header1" value="false"/> |
201 <param name="col2" value="1"/> | 215 <param name="col1" value="1,2,3,4,5"/> |
202 <param name="header2" value="True"/> | 216 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> |
217 <param name="col2" value="6"/> | |
218 <param name="header2" value="false"/> | |
203 <output name="outfile" file="feature_selection_result09"/> | 219 <output name="outfile" file="feature_selection_result09"/> |
204 </test> | 220 </test> |
205 <test> | 221 <test> |
206 <param name="selected_algorithm" value="VarianceThreshold"/> | 222 <param name="selected_algorithm" value="VarianceThreshold"/> |
207 <param name="threshold" value="0.1"/> | 223 <param name="threshold" value="0.1"/> |
223 <param name="infile2" value="test3.tabular" ftype="tabular"/> | 239 <param name="infile2" value="test3.tabular" ftype="tabular"/> |
224 <param name="header2" value="True"/> | 240 <param name="header2" value="True"/> |
225 <param name="selected_column_selector_option2" value="by_header_name"/> | 241 <param name="selected_column_selector_option2" value="by_header_name"/> |
226 <param name="col2" value="target"/> | 242 <param name="col2" value="target"/> |
227 <output name="outfile" file="feature_selection_result11"/> | 243 <output name="outfile" file="feature_selection_result11"/> |
244 </test> | |
245 <test> | |
246 <param name="selected_algorithm" value="SelectFromModel"/> | |
247 <param name="input_mode" value="prefitted"/> | |
248 <param name="fitted_estimator" value="rfr_model01" ftype="zip"/> | |
249 <param name="infile1" value="regression_train.tabular" ftype="tabular"/> | |
250 <param name="header1" value="false"/> | |
251 <param name="col1" value="1,2,3,4,5"/> | |
252 <param name="infile2" value="regression_train.tabular" ftype="tabular"/> | |
253 <param name="col2" value="1"/> | |
254 <param name="header2" value="false"/> | |
255 <output name="outfile" file="feature_selection_result12"/> | |
228 </test> | 256 </test> |
229 </tests> | 257 </tests> |
230 <help> | 258 <help> |
231 <![CDATA[ | 259 <![CDATA[ |
232 **What it does** | 260 **What it does** |