Mercurial > repos > bgruening > sklearn_feature_selection
comparison feature_selection.xml @ 20:0b88494bdcac draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author | bgruening |
---|---|
date | Fri, 09 Aug 2019 07:25:16 -0400 |
parents | ec25331946b8 |
children | c2cd3219543a |
comparison
equal
deleted
inserted
replaced
19:231e9a9849e8 | 20:0b88494bdcac |
---|---|
1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@.1"> | 1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@"> |
2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> | 2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> |
3 <macros> | 3 <macros> |
4 <import>main_macros.xml</import> | 4 <import>main_macros.xml</import> |
5 </macros> | 5 </macros> |
6 <expand macro="python_requirements"/> | 6 <expand macro="python_requirements"/> |
29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, | 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, |
30 svm, linear_model, tree, discriminant_analysis) | 30 svm, linear_model, tree, discriminant_analysis) |
31 from imblearn.pipeline import Pipeline as imbPipeline | 31 from imblearn.pipeline import Pipeline as imbPipeline |
32 from sklearn.pipeline import Pipeline | 32 from sklearn.pipeline import Pipeline |
33 | 33 |
34 sys.path.insert(0, '$__tool_directory__') | 34 from galaxy_ml.utils import (SafeEval, feature_selector, |
35 from utils import SafeEval, feature_selector, read_columns | 35 read_columns, get_module) |
36 | |
36 | 37 |
37 warnings.simplefilter('ignore') | 38 warnings.simplefilter('ignore') |
38 | 39 |
39 safe_eval = SafeEval() | 40 safe_eval = SafeEval() |
40 | 41 |
69 '$fs_algorithm_selector.options.cv_selector.groups_selector.infile_g' | 70 '$fs_algorithm_selector.options.cv_selector.groups_selector.infile_g' |
70 #end if | 71 #end if |
71 | 72 |
72 ## Read features | 73 ## Read features |
73 features_has_header = params['input_options']['header1'] | 74 features_has_header = params['input_options']['header1'] |
74 input_type = params['input_options']['selected_input'] | 75 #if $input_options.selected_input == 'tabular' |
75 if input_type == 'tabular': | 76 header = 'infer' if features_has_header else None |
76 header = 'infer' if features_has_header else None | 77 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] |
77 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] | 78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |
78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: | 79 c = params['input_options']['column_selector_options_1']['col1'] |
79 c = params['input_options']['column_selector_options_1']['col1'] | |
80 else: | |
81 c = None | |
82 X, input_df = read_columns( | |
83 '$input_options.infile1', | |
84 c = c, | |
85 c_option = column_option, | |
86 return_df = True, | |
87 sep='\t', | |
88 header=header, | |
89 parse_dates=True) | |
90 X = X.astype(float) | |
91 else: | 80 else: |
92 X = mmread('$input_options.infile1') | 81 c = None |
82 X, input_df = read_columns( | |
83 '$input_options.infile1', | |
84 c = c, | |
85 c_option = column_option, | |
86 return_df = True, | |
87 sep='\t', | |
88 header=header, | |
89 parse_dates=True) | |
90 X = X.astype(float) | |
91 #elif $input_options.selected_input == 'seq_fasta' | |
92 fasta_file = '$input_options.fasta_file' | |
93 pyfaidx = get_module('pyfaidx') | |
94 sequences = pyfaidx.Fasta(fasta_file) | |
95 n_seqs = len(sequences.keys()) | |
96 X = np.arange(n_seqs)[:, np.newaxis] | |
97 for param in estimator_params.keys(): | |
98 if param.endswith('fasta_path'): | |
99 estimator.set_params( | |
100 **{param: fasta_file}) | |
101 else: | |
102 raise ValueError( | |
103 "The selected estimator doesn't support " | |
104 "fasta file input! Please consider using " | |
105 "KerasGBatchClassifier with " | |
106 "FastaDNABatchGenerator/FastaProteinBatchGenerator " | |
107 "or having GenomeOneHotEncoder/ProteinOneHotEncoder " | |
108 "in pipeline!") | |
109 #elif $input_options.selected_input == 'sparse' | |
110 X = mmread('$input_options.infile1') | |
111 #end if | |
93 | 112 |
94 ## Read labels | 113 ## Read labels |
95 header = 'infer' if params['input_options']['header2'] else None | 114 header = 'infer' if params['input_options']['header2'] else None |
96 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] | 115 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] |
97 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: | 116 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: |