comparison feature_selection.xml @ 20:0b88494bdcac draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author bgruening
date Fri, 09 Aug 2019 07:25:16 -0400
parents ec25331946b8
children c2cd3219543a
comparison
equal deleted inserted replaced
19:231e9a9849e8 20:0b88494bdcac
1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@.1"> 1 <tool id="sklearn_feature_selection" name="Feature Selection" version="@VERSION@">
2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description> 2 <description>module, including univariate filter selection methods and recursive feature elimination algorithm</description>
3 <macros> 3 <macros>
4 <import>main_macros.xml</import> 4 <import>main_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="python_requirements"/> 6 <expand macro="python_requirements"/>
29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing,
30 svm, linear_model, tree, discriminant_analysis) 30 svm, linear_model, tree, discriminant_analysis)
31 from imblearn.pipeline import Pipeline as imbPipeline 31 from imblearn.pipeline import Pipeline as imbPipeline
32 from sklearn.pipeline import Pipeline 32 from sklearn.pipeline import Pipeline
33 33
34 sys.path.insert(0, '$__tool_directory__') 34 from galaxy_ml.utils import (SafeEval, feature_selector,
35 from utils import SafeEval, feature_selector, read_columns 35 read_columns, get_module)
36
36 37
37 warnings.simplefilter('ignore') 38 warnings.simplefilter('ignore')
38 39
39 safe_eval = SafeEval() 40 safe_eval = SafeEval()
40 41
69 '$fs_algorithm_selector.options.cv_selector.groups_selector.infile_g' 70 '$fs_algorithm_selector.options.cv_selector.groups_selector.infile_g'
70 #end if 71 #end if
71 72
72 ## Read features 73 ## Read features
73 features_has_header = params['input_options']['header1'] 74 features_has_header = params['input_options']['header1']
74 input_type = params['input_options']['selected_input'] 75 #if $input_options.selected_input == 'tabular'
75 if input_type == 'tabular': 76 header = 'infer' if features_has_header else None
76 header = 'infer' if features_has_header else None 77 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option']
77 column_option = params['input_options']['column_selector_options_1']['selected_column_selector_option'] 78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']:
78 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: 79 c = params['input_options']['column_selector_options_1']['col1']
79 c = params['input_options']['column_selector_options_1']['col1']
80 else:
81 c = None
82 X, input_df = read_columns(
83 '$input_options.infile1',
84 c = c,
85 c_option = column_option,
86 return_df = True,
87 sep='\t',
88 header=header,
89 parse_dates=True)
90 X = X.astype(float)
91 else: 80 else:
92 X = mmread('$input_options.infile1') 81 c = None
82 X, input_df = read_columns(
83 '$input_options.infile1',
84 c = c,
85 c_option = column_option,
86 return_df = True,
87 sep='\t',
88 header=header,
89 parse_dates=True)
90 X = X.astype(float)
91 #elif $input_options.selected_input == 'seq_fasta'
92 fasta_file = '$input_options.fasta_file'
93 pyfaidx = get_module('pyfaidx')
94 sequences = pyfaidx.Fasta(fasta_file)
95 n_seqs = len(sequences.keys())
96 X = np.arange(n_seqs)[:, np.newaxis]
97 for param in estimator_params.keys():
98 if param.endswith('fasta_path'):
99 estimator.set_params(
100 **{param: fasta_file})
101 else:
102 raise ValueError(
103 "The selected estimator doesn't support "
104 "fasta file input! Please consider using "
105 "KerasGBatchClassifier with "
106 "FastaDNABatchGenerator/FastaProteinBatchGenerator "
107 "or having GenomeOneHotEncoder/ProteinOneHotEncoder "
108 "in pipeline!")
109 #elif $input_options.selected_input == 'sparse'
110 X = mmread('$input_options.infile1')
111 #end if
93 112
94 ## Read labels 113 ## Read labels
95 header = 'infer' if params['input_options']['header2'] else None 114 header = 'infer' if params['input_options']['header2'] else None
96 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2'] 115 column_option = params['input_options']['column_selector_options_2']['selected_column_selector_option2']
97 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: 116 if column_option in ['by_index_number', 'all_but_by_index_number', 'by_header_name', 'all_but_by_header_name']: