Mercurial > repos > bgruening > sklearn_data_preprocess
changeset 8:4c7ec23f6cac draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 79fe42239dcf077b13f85cbcd6c6e30d7e1e4832
author | bgruening |
---|---|
date | Tue, 22 May 2018 19:32:12 -0400 |
parents | 3df69602db4d |
children | 595ecc6adb2c |
files | main_macros.xml test-data/feature_selection_result01 test-data/feature_selection_result02 test-data/feature_selection_result03 test-data/feature_selection_result04 test-data/feature_selection_result05 test-data/feature_selection_result06 test-data/feature_selection_result07 test-data/feature_selection_result08 test-data/feature_selection_result09 test-data/feature_selection_result10 |
diffstat | 11 files changed, 139 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/main_macros.xml Sat Apr 28 18:09:02 2018 -0400 +++ b/main_macros.xml Tue May 22 19:32:12 2018 -0400 @@ -2,12 +2,17 @@ <token name="@VERSION@">0.9</token> <token name="@COLUMNS_FUNCTION@"> -def read_columns(f, c, **args): +def read_columns(f, c, return_df=False, **args): data = pandas.read_csv(f, **args) cols = c.split (',') cols = map(int, cols) cols = list(map(lambda x: x - 1, cols)) - y = data.iloc[:,cols].values + data = data.iloc[:,cols] + y = data.values + if return_df: + return y, data + else: + return y return y </token> @@ -789,6 +794,128 @@ </when> <yield/> </xml> + <xml name="feature_selection_all"> + <conditional name="feature_selection_algorithms"> + <param name="selected_algorithm" type="select" label="Select a feature selection algorithm"> + <option value="SelectFromModel" selected="true">SelectFromModel - Meta-transformer for selecting features based on importance weights</option> + <option value="GenericUnivariateSelect" selected="true">GenericUnivariateSelect - Univariate feature selector with configurable strategy</option> + <option value="SelectPercentile">SelectPercentile - Select features according to a percentile of the highest scores</option> + <option value="SelectKBest">SelectKBest - Select features according to the k highest scores</option> + <option value="SelectFpr">SelectFpr - Filter: Select the p-values below alpha based on a FPR test</option> + <option value="SelectFdr">SelectFdr - Filter: Select the p-values for an estimated false discovery rate</option> + <option value="SelectFwe">SelectFwe - Filter: Select the p-values corresponding to Family-wise error rate</option> + <option value="RFE">RFE - Feature ranking with recursive feature elimination</option> + <option value="RFECV">RFECV - Feature ranking with recursive feature elimination and cross-validated selection of the best number of features</option> + <option value="VarianceThreshold">VarianceThreshold - Feature selector that removes all low-variance features</option> + <!--option value="chi2">Compute chi-squared stats between each non-negative feature and class</option--> + <!--option value="f_classif">Compute the ANOVA F-value for the provided sample</option--> + <!--option value="f_regression">Univariate linear regression tests</option--> + <!--option value="mutual_info_classif">Estimate mutual information for a discrete target variable</option--> + <!--option value="mutual_info_regression">Estimate mutual information for a continuous target variable</option--> + </param> + <when value="SelectFromModel"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" > + <option value="no_load">No, I will load a prefitted estimator</option> + </expand> + <expand macro="feature_selection_estimator_choices" > + <when value="no_load"> + <param name="fitted_estimator" type="data" format='zip' label="Load a prefitted estimator" /> + </when> + </expand> + </conditional> + <section name="options" title="Other Options" expanded="True"> + <param argument="threshold" type="text" value="" optional="true" label="threshold" help="The threshold value to use for feature selection. e.g. 'mean', 'median', '1.25*mean'." /> + <param argument="norm_order" type="integer" value="1" label="norm_order" help="Order of the norm used to filter the vectors of coefficients below threshold in the case where the coef_ attribute of the estimator is of dimension 2. " /> + </section> + </when> + <when value="GenericUnivariateSelect"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="mode" type="select" label="Feature selection mode"> + <option value="percentile">percentile</option> + <option value="k_best">k_best</option> + <option value="fpr">fpr</option> + <option value="fdr">fdr</option> + <option value="fwe">fwe</option> + </param> + <param argument="param" type="float" value="" optional="true" label="Parameter of the corresponding mode" help="float or int depending on the feature selection mode" /> + </section> + </when> + <when value="SelectPercentile"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="percentile" type="integer" value="10" optional="True" label="Percent of features to keep" /> + </section> + </when> + <when value="SelectKBest"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="k" type="integer" value="10" optional="True" label="Number of top features to select" help="No 'all' option is supported." /> + </section> + </when> + <when value="SelectFpr"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest p-value for features to be kept."/> + </section> + </when> + <when value="SelectFdr"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> + </section> + </when> + <when value="SelectFwe"> + <expand macro="feature_selection_score_function" /> + <section name="options" title="Other Options" expanded="True"> + <param argument="alpha" type="float" value="" optional="True" label="Alpha" help="The highest uncorrected p-value for features to keep."/> + </section> + </when> + <when value="RFE"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + <section name="options" title="Other Options" expanded="True"> + <param argument="n_features_to_select" type="integer" value="" optional="true" label="n_features_to_select" help="The number of features to select. If None, half of the features are selected." /> + <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> + <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> + </section> + </when> + <when value="RFECV"> + <expand macro="feature_selection_estimator" /> + <conditional name="extra_estimator"> + <expand macro="feature_selection_extra_estimator" /> + <expand macro="feature_selection_estimator_choices" /> + </conditional> + <section name="options" title="Other Options" expanded="True"> + <param argument="step" type="float" value="1" label="step" optional="true" help="Default = 1. " /> + <param argument="cv" type="integer" value="" optional="true" label="cv" help="Determines the cross-validation splitting strategy" /> + <param argument="scoring" type="text" value="" optional="true" label="scoring" help="A string (see model evaluation documentation) or a scorer callable object / function with signature scorer(estimator, X, y)."/> + <param argument="verbose" type="integer" value="0" label="verbose" help="Controls verbosity of output." /> + <param argument="n_jobs" type="integer" value="1" label="n_jobs" help="Number of cores to run in parallel while fitting across folds. Defaults to 1 core."/> + </section> + </when> + <when value="VarianceThreshold"> + <section name="options" title="Options" expanded="True"> + <param argument="threshold" type="float" value="" optional="True" label="Threshold" help="Features with a training-set variance lower than this threshold will be removed."/> + </section> + </when> + <!--when value="chi2"> + </when> + <when value="f_classif"> + </when> + <when value="f_regression"> + </when> + <when value="mutual_info_classif"> + </when> + <when value="mutual_info_regression"> + </when--> + </conditional> + </xml> <xml name="feature_selection_score_function"> <param argument="score_func" type="select" label="Select a score function"> <option value="chi2">chi2 - Compute chi-squared stats between each non-negative feature and class</option>
--- a/test-data/feature_selection_result01 Sat Apr 28 18:09:02 2018 -0400 +++ b/test-data/feature_selection_result01 Tue May 22 19:32:12 2018 -0400 @@ -1,4 +1,4 @@ -0 1 +temp_1 average 69.0 69.7 59.0 58.1 88.0 77.3
--- a/test-data/feature_selection_result02 Sat Apr 28 18:09:02 2018 -0400 +++ b/test-data/feature_selection_result02 Tue May 22 19:32:12 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 +temp_2 temp_1 forecast_noaa friend 68.0 69.0 65.0 88.0 60.0 59.0 57.0 66.0 85.0 88.0 75.0 70.0
--- a/test-data/feature_selection_result03 Sat Apr 28 18:09:02 2018 -0400 +++ b/test-data/feature_selection_result03 Tue May 22 19:32:12 2018 -0400 @@ -1,4 +1,4 @@ -0 1 +temp_1 friend 69.0 88.0 59.0 66.0 88.0 70.0
--- a/test-data/feature_selection_result04 Sat Apr 28 18:09:02 2018 -0400 +++ b/test-data/feature_selection_result04 Tue May 22 19:32:12 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 9 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Mon 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 1.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0
--- a/test-data/feature_selection_result05 Sat Apr 28 18:09:02 2018 -0400 +++ b/test-data/feature_selection_result05 Tue May 22 19:32:12 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0
--- a/test-data/feature_selection_result06 Sat Apr 28 18:09:02 2018 -0400 +++ b/test-data/feature_selection_result06 Tue May 22 19:32:12 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0
--- a/test-data/feature_selection_result07 Sat Apr 28 18:09:02 2018 -0400 +++ b/test-data/feature_selection_result07 Tue May 22 19:32:12 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0
--- a/test-data/feature_selection_result08 Sat Apr 28 18:09:02 2018 -0400 +++ b/test-data/feature_selection_result08 Tue May 22 19:32:12 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 +day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0
--- a/test-data/feature_selection_result09 Sat Apr 28 18:09:02 2018 -0400 +++ b/test-data/feature_selection_result09 Tue May 22 19:32:12 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 9 10 11 12 13 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Fri week_Mon week_Sat week_Sun week_Tues 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 0.0 1.0 0.0 0.0 0.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0 0.0 0.0 0.0 0.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0 0.0 1.0 0.0 0.0
--- a/test-data/feature_selection_result10 Sat Apr 28 18:09:02 2018 -0400 +++ b/test-data/feature_selection_result10 Tue May 22 19:32:12 2018 -0400 @@ -1,4 +1,4 @@ -0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 +month day temp_2 temp_1 average forecast_noaa forecast_acc forecast_under friend week_Fri week_Mon week_Sat week_Sun week_Thurs week_Tues week_Wed 9.0 19.0 68.0 69.0 69.7 65.0 74.0 71.0 88.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 4.0 14.0 60.0 59.0 58.1 57.0 63.0 58.0 66.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 7.0 30.0 85.0 88.0 77.3 75.0 79.0 77.0 70.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0