comparison feature_selection.xml @ 35:61edd9e5c17f draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 9981e25b00de29ed881b2229a173a8c812ded9bb
author bgruening
date Wed, 09 Aug 2023 13:10:57 +0000
parents 93f3b307485f
children
comparison
equal deleted inserted replaced
34:4483b84310ec 35:61edd9e5c17f
1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@" profile="20.05"> 1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@" profile="@PROFILE@">
2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> 2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="python_requirements" /> 6 <expand macro="python_requirements" />
29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing,
30 svm, linear_model, tree, discriminant_analysis) 30 svm, linear_model, tree, discriminant_analysis)
31 from imblearn.pipeline import Pipeline as imbPipeline 31 from imblearn.pipeline import Pipeline as imbPipeline
32 from sklearn.pipeline import Pipeline 32 from sklearn.pipeline import Pipeline
33 33
34 from galaxy_ml.model_persist import dump_model_to_h5
34 from galaxy_ml.utils import (SafeEval, feature_selector, 35 from galaxy_ml.utils import (SafeEval, feature_selector,
35 read_columns, get_module) 36 read_columns, get_module)
36 37
37 38
38 warnings.simplefilter('ignore') 39 warnings.simplefilter('ignore')
78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: 79 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
79 c = params['input_options']['column_selector_options_1']['col1'] 80 c = params['input_options']['column_selector_options_1']['col1']
80 else: 81 else:
81 c = None 82 c = None
82 X, input_df = read_columns( 83 X, input_df = read_columns(
83 '$input_options.infile1', 84 '$input_options.infile1',
84 c = c, 85 c = c,
85 c_option = column_option, 86 c_option = column_option,
86 return_df = True, 87 return_df = True,
87 sep='\t', 88 sep='\t',
88 header=header, 89 header=header,
89 parse_dates=True) 90 parse_dates=True,
91 )
90 X = X.astype(float) 92 X = X.astype(float)
91 #elif $input_options.selected_input == 'seq_fasta' 93 #elif $input_options.selected_input == 'seq_fasta'
92 fasta_file = '$input_options.fasta_file' 94 fasta_file = '$input_options.fasta_file'
93 pyfaidx = get_module('pyfaidx') 95 pyfaidx = get_module('pyfaidx')
94 sequences = pyfaidx.Fasta(fasta_file) 96 sequences = pyfaidx.Fasta(fasta_file)
116 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: 118 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
117 c = params['input_options']['column_selector_options_2']['col2'] 119 c = params['input_options']['column_selector_options_2']['col2']
118 else: 120 else:
119 c = None 121 c = None
120 y = read_columns( 122 y = read_columns(
121 '$input_options.infile2', 123 '$input_options.infile2',
122 c = c, 124 c = c,
123 c_option = column_option, 125 c_option = column_option,
124 sep='\t', 126 sep='\t',
125 header=header, 127 header=header,
126 parse_dates=True) 128 parse_dates=True,
129 )
127 y = y.ravel() 130 y = y.ravel()
128 131
129 ## Create feature selector 132 ## Create feature selector
130 new_selector = feature_selector(params['fs_algorithm_selector'], X=X, y=y) 133 new_selector = feature_selector(params['fs_algorithm_selector'], X=X, y=y)
131 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\ 134 if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\
140 selected_names = input_df.columns[new_selector.get_support(indices=True)] 143 selected_names = input_df.columns[new_selector.get_support(indices=True)]
141 res = pandas.DataFrame(res, columns = selected_names) 144 res = pandas.DataFrame(res, columns = selected_names)
142 res.to_csv(path_or_buf='$outfile', sep='\t', index=False) 145 res.to_csv(path_or_buf='$outfile', sep='\t', index=False)
143 146
144 #if $save: 147 #if $save:
145 with open('$outfile_selector', 'wb') as output_handler: 148 dump_model_to_h5(new_selector, '$outfile_selector')
146 pickle.dump(new_selector, output_handler, pickle.HIGHEST_PROTOCOL)
147 #end if 149 #end if
148 150
149 ]]> 151 ]]>
150 </configfile> 152 </configfile>
151 </configfiles> 153 </configfiles>
154 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Save the fitted selector?" /> 156 <param name="save" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Save the fitted selector?" />
155 <expand macro="sl_mixed_input_plus_sequence" /> 157 <expand macro="sl_mixed_input_plus_sequence" />
156 </inputs> 158 </inputs>
157 <outputs> 159 <outputs>
158 <data format="tabular" name="outfile" /> 160 <data format="tabular" name="outfile" />
159 <data format="zip" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}"> 161 <data format="h5mlm" name="outfile_selector" label="${fs_algorithm_selector.selected_algorithm}">
160 <filter>save</filter> 162 <filter>save</filter>
161 </data> 163 </data>
162 </outputs> 164 </outputs>
163 <tests> 165 <tests>
164 <test> 166 <test>
292 <output name="outfile" file="feature_selection_result11" /> 294 <output name="outfile" file="feature_selection_result11" />
293 </test> 295 </test>
294 <test> 296 <test>
295 <param name="selected_algorithm" value="SelectFromModel" /> 297 <param name="selected_algorithm" value="SelectFromModel" />
296 <param name="input_mode" value="prefitted" /> 298 <param name="input_mode" value="prefitted" />
297 <param name="fitted_estimator" value="rfr_model01" ftype="zip" /> 299 <param name="fitted_estimator" value="searchCV03" ftype="h5mlm" />
298 <param name="infile1" value="regression_train.tabular" ftype="tabular" /> 300 <param name="infile1" value="regression_X.tabular" ftype="tabular" />
299 <param name="header1" value="false" /> 301 <param name="header1" value="true" />
300 <param name="col1" value="1,2,3,4,5" /> 302 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17" />
301 <param name="infile2" value="regression_train.tabular" ftype="tabular" /> 303 <param name="infile2" value="regression_y.tabular" ftype="tabular" />
302 <param name="col2" value="1" /> 304 <param name="col2" value="1" />
303 <param name="header2" value="false" /> 305 <param name="header2" value="true" />
304 <output name="outfile" file="feature_selection_result12" /> 306 <output name="outfile" file="feature_selection_result12" />
305 </test> 307 </test>
306 <test> 308 <test>
307 <param name="selected_algorithm" value="RFECV" /> 309 <param name="selected_algorithm" value="RFECV" />
308 <param name="input_mode" value="new" /> 310 <param name="input_mode" value="new" />