Mercurial > repos > bgruening > sklearn_feature_selection
comparison feature_selection.xml @ 35:61edd9e5c17f draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author | bgruening |
---|---|
date | Wed, 09 Aug 2023 13:10:57 +0000 |
parents | 93f3b307485f |
children |
comparison
equal
deleted
inserted
replaced
34:4483b84310ec | 35:61edd9e5c17f |
---|---|
1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@" profile="20.05"> | 1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@" profile="@PROFILE@"> |
2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> | 2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="python_requirements" /> | 6 <expand macro="python_requirements" /> |
29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, | 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, |
30 svm, linear_model, tree, discriminant_analysis) | 30 svm, linear_model, tree, discriminant_analysis) |
31 from imblearn.pipeline import Pipeline as imbPipeline | 31 from imblearn.pipeline import Pipeline as imbPipeline |
32 from sklearn.pipeline import Pipeline | 32 from sklearn.pipeline import Pipeline |
33 | 33 |
34 from galaxy_ml.model_persist import dump_model_to_h5 | |
34 from galaxy_ml.utils import (SafeEval, feature_selector, | 35 from galaxy_ml.utils import (SafeEval, feature_selector, |
35 read_columns, get_module) | 36 read_columns, get_module) |
36 | 37 |
37 | 38 |
38 warnings.simplefilter('ignore') | 39 warnings.simplefilter('ignore') |
78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: | 79 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
79 c = params['input_options']['column_selector_options_1']['col1'] | 80 c = params['input_options']['column_selector_options_1']['col1'] |
80 else: | 81 else: |
81 c = None | 82 c = None |
82 X, input_df = read_columns( | 83 X, input_df = read_columns( |
83 '$input_options.infile1', | 84 '$input_options.infile1', |
84 c = c, | 85 c = c, |
85 c_option = column_option, | 86 c_option = column_option, |
86 return_df = True, | 87 return_df = True, |
87 sep='\t', | 88 sep='\t', |
88 header=header, | 89 header=header, |
89 parse_dates=True) | 90 parse_dates=True, |
91 ) | |
90 X = X.astype(float) | 92 X = X.astype(float) |
91 #elif $input_options.selected_input == 'seq_fasta' | 93 #elif $input_options.selected_input == 'seq_fasta' |
92 fasta_file = '$input_options.fasta_file' | 94 fasta_file = '$input_options.fasta_file' |
93 pyfaidx = get_module('pyfaidx') | 95 pyfaidx = get_module('pyfaidx') |
94 sequences = pyfaidx.Fasta(fasta_file) | 96 sequences = pyfaidx.Fasta(fasta_file) |
116 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: | 118 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
117 c = params['input_options']['column_selector_options_2']['col2'] | 119 c = params['input_options']['column_selector_options_2']['col2'] |
118 else: | 120 else: |
119 c = None | 121 c = None |
120 y = read_columns( | 122 y = read_columns( |
121 '$input_options.infile2', | 123 '$input_options.infile2', |
122 c = c, | 124 c = c, |
123 c_option = column_option, | 125 c_option = column_option, |
124 sep='\t', | 126 sep='\t', |
125 header=header, | 127 header=header, |
126 parse_dates=True) | 128 parse_dates=True, |
129 ) | |
127 y = y.ravel() | 130 y = y.ravel() |
128 | 131 |
129 ## Create feature selector | 132 ## Create feature selector |
130 new_selector = feature_selector(params['fs_algorithm_selector'], X=X, y=y) | 133 new_selector = feature_selector(params['fs_algorithm_selector'], X=X, y=y) |
131 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\ | 134 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\ |
140 selected_names = input_df.columns[new_selector.get_support(indices=True)] | 143 selected_names = input_df.columns[new_selector.get_support(indices=True)] |
141 res = pandas.DataFrame(res, columns = selected_names) | 144 res = pandas.DataFrame(res, columns = selected_names) |
142 res.to_csv(path_or_buf='$outfile', sep='\t', index=False) | 145 res.to_csv(path_or_buf='$outfile', sep='\t', index=False) |
143 | 146 |
144 #if $save: | 147 #if $save: |
145 with open('$outfile_selector', 'wb') as output_handler: | 148 dump_model_to_h5(new_selector, '$outfile_selector') |
146 pickle.dump(new_selector, output_handler, pickle.HIGHEST_PROTOCOL) | |
147 #end if | 149 #end if |
148 | 150 |
149 ]]> | 151 ]]> |
150 </configfile> | 152 </configfile> |
151 </configfiles> | 153 </configfiles> |
154 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Save the fitted selector?" /> | 156 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Save the fitted selector?" /> |
155 <expand macro="sl_mixed_input_plus_sequence" /> | 157 <expand macro="sl_mixed_input_plus_sequence" /> |
156 </inputs> | 158 </inputs> |
157 <outputs> | 159 <outputs> |
158 <data format="tabular" name="outfile" /> | 160 <data format="tabular" name="outfile" /> |
159 <data format="zip" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}"> | 161 <data format="h5mlm" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}"> |
160 <filter>save</filter> | 162 <filter>save</filter> |
161 </data> | 163 </data> |
162 </outputs> | 164 </outputs> |
163 <tests> | 165 <tests> |
164 <test> | 166 <test> |
292 <output name="outfile" file="feature_selection_result11" /> | 294 <output name="outfile" file="feature_selection_result11" /> |
293 </test> | 295 </test> |
294 <test> | 296 <test> |
295 <param name="selected_algorithm" value="SelectFromModel" /> | 297 <param name="selected_algorithm" value="SelectFromModel" /> |
296 <param name="input_mode" value="prefitted" /> | 298 <param name="input_mode" value="prefitted" /> |
297 <param name="fitted_estimator" value="rfr_model01" ftype="zip" /> | 299 <param name="fitted_estimator" value="searchCV03" ftype="h5mlm" /> |
298 <param name="infile1" value="regression_train.tabular" ftype="tabular" /> | 300 <param name="infile1" value="regression_X.tabular" ftype="tabular" /> |
299 <param name="header1" value="false" /> | 301 <param name="header1" value="true" /> |
300 <param name="col1" value="1,2,3,4,5" /> | 302 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17" /> |
301 <param name="infile2" value="regression_train.tabular" ftype="tabular" /> | 303 <param name="infile2" value="regression_y.tabular" ftype="tabular" /> |
302 <param name="col2" value="1" /> | 304 <param name="col2" value="1" /> |
303 <param name="header2" value="false" /> | 305 <param name="header2" value="true" /> |
304 <output name="outfile" file="feature_selection_result12" /> | 306 <output name="outfile" file="feature_selection_result12" /> |
305 </test> | 307 </test> |
306 <test> | 308 <test> |
307 <param name="selected_algorithm" value="RFECV" /> | 309 <param name="selected_algorithm" value="RFECV" /> |
308 <param name="input_mode" value="new" /> | 310 <param name="input_mode" value="new" /> |