Mercurial > repos > bgruening > sklearn_feature_selection
diff feature_selection.xml @ 18:ec25331946b8 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit c0a3a186966888e5787335a7628bf0a4382637e7
author | bgruening |
---|---|
date | Tue, 14 May 2019 18:17:57 -0400 |
parents | 2bbbac61e48d |
children | 0b88494bdcac |
line wrap: on
line diff
--- a/feature_selection.xml Sun Dec 30 01:57:11 2018 -0500 +++ b/feature_selection.xml Tue May 14 18:17:57 2019 -0400 @@ -4,6 +4,7 @@ <import>main_macros.xml</import> </macros> <expand macro="python_requirements"/> + <!--TODO: Add imblearn package support--> <expand macro="macro_stdio"/> <version_command>echo "@VERSION@"</version_command> <command> @@ -17,10 +18,21 @@ <![CDATA[ import json import sklearn.feature_selection +import skrebate +import pandas +import sys +import warnings +import xgboost +from sklearn import ( + cluster, compose, decomposition, ensemble, feature_extraction, + feature_selection, gaussian_process, kernel_approximation, metrics, + model_selection, naive_bayes, neighbors, pipeline, preprocessing, + svm, linear_model, tree, discriminant_analysis) +from imblearn.pipeline import Pipeline as imbPipeline +from sklearn.pipeline import Pipeline -with open('$__tool_directory__/sk_whitelist.json', 'r') as f: - sk_whitelist = json.load(f) -exec(open('$__tool_directory__/utils.py').read(), globals()) +sys.path.insert(0, '$__tool_directory__') +from utils import SafeEval, feature_selector, read_columns warnings.simplefilter('ignore') @@ -30,7 +42,7 @@ with open(input_json_path, 'r') as param_handler: params = json.load(param_handler) -#handle cheetah +## handle cheetah #if $fs_algorithm_selector.selected_algorithm == 'SelectFromModel'\ and $fs_algorithm_selector.model_inputter.input_mode == 'prefitted': params['fs_algorithm_selector']['model_inputter']['fitted_estimator'] =\ @@ -39,18 +51,25 @@ #if $fs_algorithm_selector.selected_algorithm == 'SelectFromModel'\ and $fs_algorithm_selector.model_inputter.input_mode == 'new'\ - and $fs_algorithm_selector.model_inputter.estimator_selector.selected_module == 'customer_estimator': + and $fs_algorithm_selector.model_inputter.estimator_selector.selected_module == 'custom_estimator': params['fs_algorithm_selector']['model_inputter']['estimator_selector']['c_estimator'] =\ '$fs_algorithm_selector.model_inputter.estimator_selector.c_estimator' #end if -#if $fs_algorithm_selector.selected_algorithm in ['RFE', 'RFECV']\ - and $fs_algorithm_selector.estimator_selector.selected_module == 'customer_estimator': +#if $fs_algorithm_selector.selected_algorithm in ['RFE', 'RFECV', 'DyRFECV']\ + and $fs_algorithm_selector.estimator_selector.selected_module == 'custom_estimator': params['fs_algorithm_selector']['estimator_selector']['c_estimator'] =\ '$fs_algorithm_selector.estimator_selector.c_estimator' #end if -# Read features +#if $fs_algorithm_selector.selected_algorithm in ['RFECV', 'DyRFECV']\ + and $fs_algorithm_selector.options.cv_selector.selected_cv\ + in ['GroupKFold', 'GroupShuffleSplit', 'LeaveOneGroupOut', 'LeavePGroupsOut']: +params['fs_algorithm_selector']['options']['cv_selector']['groups_selector']['infile_g'] =\ + '$fs_algorithm_selector.options.cv_selector.groups_selector.infile_g' +#end if + +## Read features features_has_header = params['input_options']['header1'] input_type = params['input_options']['selected_input'] if input_type == 'tabular': @@ -67,12 +86,12 @@ return_df = True, sep='\t', header=header, - parse_dates=True - ) + parse_dates=True) + X = X.astype(float) else: X = mmread('$input_options.infile1') -# Read labels +## Read labels header = 'infer' if params['input_options']['header2'] else None column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: @@ -85,12 +104,11 @@ c_option = column_option, sep='\t', header=header, - parse_dates=True -) -y=y.ravel() + parse_dates=True) +y = y.ravel() -# Create feature selector -new_selector = feature_selector(params['fs_algorithm_selector']) +## Create feature selector +new_selector = feature_selector(params['fs_algorithm_selector'], X=X, y=y) if params['fs_algorithm_selector']['selected_algorithm'] != 'SelectFromModel'\ or params['fs_algorithm_selector']['model_inputter']['input_mode'] != 'prefitted' : new_selector.fit(X, y) @@ -266,6 +284,28 @@ <param name="header2" value="false"/> <output name="outfile" file="feature_selection_result12"/> </test> + <test> + <param name="selected_algorithm" value="RFECV"/> + <param name="input_mode" value="new"/> + <param name="selected_module" value="ensemble"/> + <param name="selected_estimator" value="RandomForestRegressor"/> + <param name="text_params" value="n_estimators=10, random_state=10"/> + <section name="groups_selector"> + <param name="infile_groups" value="regression_y.tabular" ftype="tabular"/> + <param name="header_g" value="true"/> + <param name="selected_column_selector_option_g" value="by_index_number"/> + <param name="col_g" value="1"/> + </section> + <param name="selected_cv" value="GroupShuffleSplit"/> + <param name="random_state" value="0"/> + <param name="infile1" value="regression_X.tabular" ftype="tabular"/> + <param name="header1" value="true"/> + <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> + <param name="infile2" value="regression_y.tabular" ftype="tabular"/> + <param name="col2" value="1"/> + <param name="header2" value="true"/> + <output name="outfile" file="feature_selection_result13"/> + </test> </tests> <help> <![CDATA[