sklearn_feature_selection: feature_selection.xml comparison

comparison feature_selection.xml @ 2:2eb90e73f0d5 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 79fe42239dcf077b13f85cbcd6c6e30d7e1e4832

author	bgruening
date	Tue, 22 May 2018 19:31:59 -0400
parents	092199a095dd
children	3a1acc39b39b

comparison

equal deleted inserted replaced

-:58322d3c7bd3
+:2eb90e73f0d5
-<tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@">
+<tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@.1">
 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description>
 <macros>
 <import>main_macros.xml</import>
 </macros>
 <expand macro="python_requirements"/>
 @COLUMNS_FUNCTION@
 input_json_path = sys.argv[1]
 params = json.load(open(input_json_path, "r"))
+## Read features
+features_has_header = params["input_options"]["header1"]
 input_type = params["input_options"]["selected_input"]
 if input_type=="tabular":
+header = 'infer' if features_has_header else None
 header = 'infer' if params["input_options"]["header1"] else None
-X = read_columns(
+X, input_df = read_columns(
 "$input_options.infile1",
 "$input_options.col1",
+return_df = True,
 sep='\t',
 header=header,
 parse_dates=True
 )
 else:
 X = mmread(open("$input_options.infile1", 'r'))
+## Read labels
 header = 'infer' if params["input_options"]["header2"] else None
 y = read_columns(
 "$input_options.infile2",
 "$input_options.col2",
 sep='\t',
 header=header,
 parse_dates=True
 )
 y=y.ravel()
+## Create feature selector
 selector = params["feature_selection_algorithms"]["selected_algorithm"]
 selector = getattr(sklearn.feature_selection, selector)
 options = params["feature_selection_algorithms"]["options"]
-#if $feature_selection_algorithms.selected_algorithm == 'SelectFromModel':
+if params['feature_selection_algorithms']['selected_algorithm'] == 'SelectFromModel':
 if not options['threshold'] or options['threshold'] == 'None':
 options['threshold'] = None
-#if $feature_selection_algorithms.extra_estimator.has_estimator == 'no_load':
+if 'extra_estimator' in params['feature_selection_algorithms'] and params['feature_selection_algorithms']['extra_estimator']['has_estimator'] == 'no_load':
-fitted_estimator = pickle.load(open("$feature_selection_algorithms.extra_estimator.fitted_estimator", 'r'))
+fitted_estimator = pickle.load(open("params['feature_selection_algorithms']['extra_estimator']['fitted_estimator']", 'r'))
 new_selector = selector(fitted_estimator, prefit=True, **options)
-#else:
+else:
 estimator=params["feature_selection_algorithms"]["estimator"]
 if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no':
 estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"]
 estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
 new_selector = selector(estimator, **options)
 new_selector.fit(X, y)
-#end if
+elif params['feature_selection_algorithms']['selected_algorithm'] in ['RFE', 'RFECV']:
-#elif $feature_selection_algorithms.selected_algorithm in ['RFE', 'RFECV']:
+if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'):
-if 'scoring' in options and (not options['scoring'] or options['scoring'] == 'None'):
+options['scoring'] = None
-options['scoring'] = None
+estimator=params["feature_selection_algorithms"]["estimator"]
-estimator=params["feature_selection_algorithms"]["estimator"]
+if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no':
-if params["feature_selection_algorithms"]["extra_estimator"]["has_estimator"]=='no':
+estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"]
-estimator=params["feature_selection_algorithms"]["extra_estimator"]["new_estimator"]
+estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
-estimator=eval(estimator.replace('__dq__', '"').replace("__sq__","'"))
+new_selector = selector(estimator, **options)
-new_selector = selector(estimator, **options)
+new_selector.fit(X, y)
-new_selector.fit(X, y)
+elif params['feature_selection_algorithms']['selected_algorithm'] == "VarianceThreshold":
-#elif $feature_selection_algorithms.selected_algorithm == "VarianceThreshold":
+new_selector = selector(**options)
-new_selector = selector(**options)
+new_selector.fit(X, y)
-new_selector.fit(X, y)
+else:
-#else:
+score_func = params["feature_selection_algorithms"]["score_func"]
-score_func = params["feature_selection_algorithms"]["score_func"]
+score_func = getattr(sklearn.feature_selection, score_func)
-score_func = getattr(sklearn.feature_selection, score_func)
+new_selector = selector(score_func, **options)
-new_selector = selector(score_func, **options)
+new_selector.fit(X, y)
-new_selector.fit(X, y)
-#end if
+## Transform to select features
+selected_names = None
-#if $select_methods.selected_method == "fit_transform":
+if "$select_methods.selected_method" == "fit_transform":
 res = new_selector.transform(X)
+if features_has_header:
-#else:
+selected_names = input_df.columns[new_selector.get_support(indices=True)]
-res = new_selector.get_support(params["select_methods"]["indices"])
+else:
-#end if
+res = new_selector.get_support(params["select_methods"]["indices"])
-res = pandas.DataFrame(res)
+res = pandas.DataFrame(res, columns = selected_names)
 res.to_csv(path_or_buf="$outfile", sep='\t', index=False)
 ]]>
 </configfile>
 </configfiles>
 <inputs>
-<conditional name="feature_selection_algorithms">
+<expand macro="feature_selection_all" />
-<param name="selected_algorithm" type="select" label="Select a feature selection algorithm">
-<option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option>
-<option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option>
-<option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option>
-<option value="SelectKBest">SelectKBest - Select features according to the k highest scores</option>
-<option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option>
-<option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option>
-<option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option>
-<option value="RFE">RFE - Feature ranking with recursive feature elimination</option>
-<option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option>
-<option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option>
-<!--option value="chi2">Compute chi-squared stats between each non-negative feature and class</option-->
-<!--option value="f_classif">Compute the ANOVA F-value for the provided sample</option-->
-<!--option value="f_regression">Univariate linear regression tests</option-->
-<!--option value="mutual_info_classif">Estimate mutual information for a discrete target variable</option-->
-<!--option value="mutual_info_regression">Estimate mutual information for a continuous target variable</option-->
-</param>
-<when value="SelectFromModel">
-<expand macro="feature_selection_estimator" />
-<conditional name="extra_estimator">
-<expand macro="feature_selection_extra_estimator" >
-<option value="no_load">No, I will load a prefitted estimator</option>
-</expand>
-<expand macro="feature_selection_estimator_choices" >
-<when value="no_load">
-<param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" />
-</when>
-</expand>
-</conditional>
-<section name="options" title="Other Options" expanded="True">
-<param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." />
-<param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " />
-</section>
-</when>
-<when value="GenericUnivariateSelect">
-<expand macro="feature_selection_score_function" />
-<section name="options" title="Other Options" expanded="True">
-<param argument="mode" type="select" label="Feature selection mode">
-<option value="percentile">percentile</option>
-<option value="k_best">k_best</option>
-<option value="fpr">fpr</option>
-<option value="fdr">fdr</option>
-<option value="fwe">fwe</option>
-</param>
-<param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" />
-</section>
-</when>
-<when value="SelectPercentile">
-<expand macro="feature_selection_score_function" />
-<section name="options" title="Other Options" expanded="True">
-<param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" />
-</section>
-</when>
-<when value="SelectKBest">
-<expand macro="feature_selection_score_function" />
-<section name="options" title="Other Options" expanded="True">
-<param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." />
-</section>
-</when>
-<when value="SelectFpr">
-<expand macro="feature_selection_score_function" />
-<section name="options" title="Other Options" expanded="True">
-<param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/>
-</section>
-</when>
-<when value="SelectFdr">
-<expand macro="feature_selection_score_function" />
-<section name="options" title="Other Options" expanded="True">
-<param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
-</section>
-</when>
-<when value="SelectFwe">
-<expand macro="feature_selection_score_function" />
-<section name="options" title="Other Options" expanded="True">
-<param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/>
-</section>
-</when>
-<when value="RFE">
-<expand macro="feature_selection_estimator" />
-<conditional name="extra_estimator">
-<expand macro="feature_selection_extra_estimator" />
-<expand macro="feature_selection_estimator_choices" />
-</conditional>
-<section name="options" title="Other Options" expanded="True">
-<param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." />
-<param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
-<param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
-</section>
-</when>
-<when value="RFECV">
-<expand macro="feature_selection_estimator" />
-<conditional name="extra_estimator">
-<expand macro="feature_selection_extra_estimator" />
-<expand macro="feature_selection_estimator_choices" />
-</conditional>
-<section name="options" title="Other Options" expanded="True">
-<param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " />
-<param argument="cv" type="integer" value="" optional="true" label="cv" help="Determines the cross-validation splitting strategy" />
-<param argument="scoring" type="text" value="" optional="true" label="scoring" help="A string (see model evaluation documentation) or a scorer callable object / function with signature scorer(estimator, X, y)."/>
-<param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." />
-<param argument="n_jobs" type="integer" value="1" label="n_jobs" help="Number of cores to run in parallel while fitting across folds. Defaults to 1 core."/>
-</section>
-</when>
-<when value="VarianceThreshold">
-<section name="options" title="Options" expanded="True">
-<param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/>
-</section>
-</when>
-<!--when value="chi2">
-</when>
-<when value="f_classif">
-</when>
-<when value="f_regression">
-</when>
-<when value="mutual_info_classif">
-</when>
-<when value="mutual_info_regression">
-</when-->
-</conditional>
 <expand macro="feature_selection_methods" />
 <expand macro="sl_mixed_input"/>
 </inputs>
 <outputs>
-<data format="txt" name="outfile"/>
+<data format="tabular" name="outfile"/>
 </outputs>
 <tests>
 <test>
 <param name="selected_algorithm" value="SelectFromModel"/>
 <param name="has_estimator" value="no"/>

Mercurial > repos > bgruening > sklearn_feature_selection

comparison feature_selection.xml @ 2:2eb90e73f0d5 draft