Mercurial > repos > bgruening > sklearn_feature_selection
diff feature_selection.xml @ 20:0b88494bdcac draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author | bgruening |
---|---|
date | Fri, 09 Aug 2019 07:25:16 -0400 |
parents | ec25331946b8 |
children | c2cd3219543a |
line wrap: on
line diff
--- a/feature_selection.xml Tue Jul 09 19:34:06 2019 -0400 +++ b/feature_selection.xml Fri Aug 09 07:25:16 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@.1"> +<tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@"> <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> <macros> <import>main_macros.xml</import> @@ -31,8 +31,9 @@ from imblearn.pipeline import Pipeline as imbPipeline from sklearn.pipeline import Pipeline -sys.path.insert(0, '$__tool_directory__') -from utils import SafeEval, feature_selector, read_columns +from galaxy_ml.utils import (SafeEval, feature_selector, + read_columns, get_module) + warnings.simplefilter('ignore') @@ -71,25 +72,43 @@ ## Read features features_has_header = params['input_options']['header1'] -input_type = params['input_options']['selected_input'] -if input_type == 'tabular': - header = 'infer' if features_has_header else None - column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] - if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: - c = params['input_options']['column_selector_options_1']['col1'] - else: - c = None - X, input_df = read_columns( - '$input_options.infile1', - c = c, - c_option = column_option, - return_df = True, - sep='\t', - header=header, - parse_dates=True) - X = X.astype(float) +#if $input_options.selected_input == 'tabular' +header = 'infer' if features_has_header else None +column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] +if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: + c = params['input_options']['column_selector_options_1']['col1'] else: - X = mmread('$input_options.infile1') + c = None +X, input_df = read_columns( + '$input_options.infile1', + c = c, + c_option = column_option, + return_df = True, + sep='\t', + header=header, + parse_dates=True) +X = X.astype(float) +#elif $input_options.selected_input == 'seq_fasta' +fasta_file = '$input_options.fasta_file' +pyfaidx = get_module('pyfaidx') +sequences = pyfaidx.Fasta(fasta_file) +n_seqs = len(sequences.keys()) +X = np.arange(n_seqs)[:, np.newaxis] +for param in estimator_params.keys(): + if param.endswith('fasta_path'): + estimator.set_params( + **{param: fasta_file}) +else: + raise ValueError( + "The selected estimator doesn't support " + "fasta file input! Please consider using " + "KerasGBatchClassifier with " + "FastaDNABatchGenerator/FastaProteinBatchGenerator " + "or having GenomeOneHotEncoder/ProteinOneHotEncoder " + "in pipeline!") +#elif $input_options.selected_input == 'sparse' +X = mmread('$input_options.infile1') +#end if ## Read labels header = 'infer' if params['input_options']['header2'] else None