Mercurial > repos > bgruening > sklearn_build_pipeline
comparison pipeline.xml @ 10:775b004b7920 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author | bgruening |
---|---|
date | Fri, 09 Aug 2019 07:18:27 -0400 |
parents | 913ee94945f3 |
children | 3f3c6dc38f3e |
comparison
equal
deleted
inserted
replaced
9:019bd8289224 | 10:775b004b7920 |
---|---|
15 <inputs name="inputs" /> | 15 <inputs name="inputs" /> |
16 <configfile name="sklearn_pipeline_script"> | 16 <configfile name="sklearn_pipeline_script"> |
17 <![CDATA[ | 17 <![CDATA[ |
18 import imblearn | 18 import imblearn |
19 import json | 19 import json |
20 import pandas as pd | |
20 import pickle | 21 import pickle |
21 import pprint | 22 import pprint |
22 import skrebate | 23 import skrebate |
23 import sys | 24 import sys |
24 import warnings | 25 import warnings |
25 from mlxtend import classifier, regressor | |
26 from sklearn import ( | 26 from sklearn import ( |
27 cluster, compose, decomposition, ensemble, feature_extraction, | 27 cluster, compose, decomposition, ensemble, feature_extraction, |
28 feature_selection, gaussian_process, kernel_approximation, metrics, | 28 feature_selection, gaussian_process, kernel_approximation, metrics, |
29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, | 29 model_selection, naive_bayes, neighbors, pipeline, preprocessing, |
30 svm, linear_model, tree, discriminant_analysis) | 30 svm, linear_model, tree, discriminant_analysis) |
31 from sklearn.pipeline import make_pipeline | 31 from sklearn.pipeline import make_pipeline |
32 from imblearn.pipeline import make_pipeline as imb_make_pipeline | 32 from imblearn.pipeline import make_pipeline as imb_make_pipeline |
33 | 33 from galaxy_ml.utils import (SafeEval, feature_selector, get_estimator, |
34 sys.path.insert(0, '$__tool_directory__') | 34 try_get_attr, get_search_params) |
35 | 35 |
36 from utils import SafeEval, feature_selector, get_estimator, try_get_attr | |
37 from preprocessors import Z_RandomOverSampler | |
38 | 36 |
39 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) | 37 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1)) |
40 | 38 |
41 warnings.filterwarnings('ignore') | 39 warnings.filterwarnings('ignore') |
42 | 40 |
67 sys.exit("The pre-processing component type can't be None " | 65 sys.exit("The pre-processing component type can't be None " |
68 "when the number of components is greater than 1.") | 66 "when the number of components is greater than 1.") |
69 if input_json['component_type'] == 'pre_processor': | 67 if input_json['component_type'] == 'pre_processor': |
70 preprocessor = input_json['pre_processors']['selected_pre_processor'] | 68 preprocessor = input_json['pre_processors']['selected_pre_processor'] |
71 pre_processor_options = input_json['pre_processors']['options'] | 69 pre_processor_options = input_json['pre_processors']['options'] |
70 if 'feature_range' in pre_processor_options: | |
71 feature_range = safe_eval(pre_processor_options['feature_range'].strip()) | |
72 if not feature_range: | |
73 feature_range = (0, 1) | |
74 pre_processor_options['feature_range'] = feature_range | |
72 my_class = getattr(preprocessing, preprocessor) | 75 my_class = getattr(preprocessing, preprocessor) |
73 obj = my_class(**pre_processor_options) | 76 obj = my_class(**pre_processor_options) |
74 elif input_json['component_type'] == 'feature_selection': | 77 elif input_json['component_type'] == 'feature_selection': |
75 obj = feature_selector(input_json['fs_algorithm_selector']) | 78 obj = feature_selector(input_json['fs_algorithm_selector']) |
76 elif input_json['component_type'] == 'decomposition': | 79 elif input_json['component_type'] == 'decomposition': |
108 is_imblearn = True | 111 is_imblearn = True |
109 algorithm = input_json['imblearn_selector']['select_algorithm'] | 112 algorithm = input_json['imblearn_selector']['select_algorithm'] |
110 if algorithm == 'over_sampling.SMOTENC': | 113 if algorithm == 'over_sampling.SMOTENC': |
111 obj = over_sampling.SMOTENC(categorical_features=[]) | 114 obj = over_sampling.SMOTENC(categorical_features=[]) |
112 elif algorithm == 'Z_RandomOverSampler': | 115 elif algorithm == 'Z_RandomOverSampler': |
116 Z_RandomOverSampler = try_get_attr('galaxy_ml.preprocessors', | |
117 'Z_RandomOverSampler') | |
113 obj = Z_RandomOverSampler() | 118 obj = Z_RandomOverSampler() |
114 else: | 119 else: |
115 globals = algorithm.split('.') | 120 globals = algorithm.split('.') |
116 mod, klass = globals[0], globals[1] | 121 mod, klass = globals[0], globals[1] |
117 obj = getattr(getattr(imblearn, mod), klass)() | 122 obj = getattr(getattr(imblearn, mod), klass)() |
118 options = input_json['imblearn_selector']['text_params'].strip() | 123 options = input_json['imblearn_selector']['text_params'].strip() |
119 if options != '': | 124 if options != '': |
120 options = safe_eval( 'dict(' + options + ')' ) | 125 options = safe_eval( 'dict(' + options + ')' ) |
121 obj.set_params(**options) | 126 obj.set_params(**options) |
122 elif input_json['component_type'] == 'IRAPS': | 127 elif input_json['component_type'] == 'IRAPS': |
123 iraps_core = try_get_attr('iraps_classifier','IRAPSCore')() | 128 iraps_core = try_get_attr('galaxy_ml.iraps_classifier','IRAPSCore')() |
124 core_params = input_json['text_params'].strip() | 129 core_params = input_json['text_params'].strip() |
125 if core_params != '': | 130 if core_params != '': |
126 try: | 131 try: |
127 params = safe_eval('dict(' + core_params + ')') | 132 params = safe_eval('dict(' + core_params + ')') |
128 except ValueError: | 133 except ValueError: |
135 options['fc_thres'] = input_json['fc_thres'] | 140 options['fc_thres'] = input_json['fc_thres'] |
136 if input_json['occurrence'] is not None: | 141 if input_json['occurrence'] is not None: |
137 options['occurrence'] = input_json['occurrence'] | 142 options['occurrence'] = input_json['occurrence'] |
138 if input_json['discretize'] is not None: | 143 if input_json['discretize'] is not None: |
139 options['discretize'] = input_json['discretize'] | 144 options['discretize'] = input_json['discretize'] |
140 IRAPSClassifier = try_get_attr('iraps_classifier','IRAPSClassifier') | 145 IRAPSClassifier = try_get_attr('galaxy_ml.iraps_classifier','IRAPSClassifier') |
141 obj = IRAPSClassifier(iraps_core, **options) | 146 obj = IRAPSClassifier(iraps_core, **options) |
147 elif input_json['component_type'] == 'preprocessors': | |
148 encoder_selection = input_json['encoder_selection'] | |
149 encoder_type = encoder_selection.pop('encoder_type') | |
150 klass = try_get_attr('galaxy_ml.preprocessors', encoder_type) | |
151 obj = klass(**encoder_selection) | |
152 | |
142 if 'n_jobs' in obj.get_params(): | 153 if 'n_jobs' in obj.get_params(): |
143 obj.set_params( n_jobs=N_JOBS ) | 154 obj.set_params( n_jobs=N_JOBS ) |
144 return obj, is_imblearn | 155 return obj, is_imblearn |
145 | 156 |
146 has_imblearn = False | 157 has_imblearn = False |
170 #if $output_type == 'Final_Estimator_Builder': | 181 #if $output_type == 'Final_Estimator_Builder': |
171 with open('$outfile', 'wb') as out_handler: | 182 with open('$outfile', 'wb') as out_handler: |
172 final_est = pipeline_steps[-1] | 183 final_est = pipeline_steps[-1] |
173 print(final_est) | 184 print(final_est) |
174 pickle.dump(final_est, out_handler, pickle.HIGHEST_PROTOCOL) | 185 pickle.dump(final_est, out_handler, pickle.HIGHEST_PROTOCOL) |
186 out_obj = final_est | |
175 #else: | 187 #else: |
176 if has_imblearn: | 188 if has_imblearn: |
177 pipeline = imb_make_pipeline(*pipeline_steps) | 189 pipeline = imb_make_pipeline(*pipeline_steps) |
178 else: | 190 else: |
179 pipeline = make_pipeline(*pipeline_steps) | 191 pipeline = make_pipeline(*pipeline_steps) |
180 pprint.pprint(pipeline.named_steps) | 192 pprint.pprint(pipeline.named_steps) |
181 | 193 |
182 with open('$outfile', 'wb') as out_handler: | 194 with open('$outfile', 'wb') as out_handler: |
183 pickle.dump(pipeline, out_handler, pickle.HIGHEST_PROTOCOL) | 195 pickle.dump(pipeline, out_handler, pickle.HIGHEST_PROTOCOL) |
196 out_obj = pipeline | |
197 #end if | |
198 | |
199 #if $get_params | |
200 results = get_search_params(out_obj) | |
201 df = pd.DataFrame(results, columns=['', 'Parameter', 'Value']) | |
202 df.to_csv('$outfile_params', sep='\t', index=False) | |
184 #end if | 203 #end if |
185 ]]> | 204 ]]> |
186 </configfile> | 205 </configfile> |
187 </configfiles> | 206 </configfiles> |
188 <inputs> | 207 <inputs> |
193 <option value="pre_processor">Sklearn Preprocessor</option> | 212 <option value="pre_processor">Sklearn Preprocessor</option> |
194 <option value="feature_selection">Feature Selection</option> | 213 <option value="feature_selection">Feature Selection</option> |
195 <option value="decomposition">Matrix Decomposition</option> | 214 <option value="decomposition">Matrix Decomposition</option> |
196 <option value="kernel_approximation">Kernel Approximation</option> | 215 <option value="kernel_approximation">Kernel Approximation</option> |
197 <option value="FeatureAgglomeration">Agglomerate Features</option> | 216 <option value="FeatureAgglomeration">Agglomerate Features</option> |
198 <option value="skrebate">SK-rebate feature selection</option> | 217 <option value="skrebate">SK-rebate Feature Selection</option> |
199 <option value="imblearn">imbalanced-learn sampling</option> | 218 <option value="imblearn">Imbalanced-learn Sampling</option> |
200 <option value="IRAPS">IRAPS -- feature selector and classifier</option> | 219 <option value="IRAPS">IRAPS -- feature selector and classifier</option> |
220 <option value="preprocessors">Bio-sequence Encoders</option> | |
201 </param> | 221 </param> |
202 <when value="None"/> | 222 <when value="None"/> |
203 <when value="pre_processor"> | 223 <when value="pre_processor"> |
204 <conditional name="pre_processors"> | 224 <conditional name="pre_processors"> |
205 <expand macro="sparse_preprocessors_ext" /> | 225 <expand macro="sparse_preprocessors_ext" /> |
230 help="Default(=blank): n_iter=1000, responsive_thres=-1, resistant_thres=0, random_state=None. No double quotes"/> | 250 help="Default(=blank): n_iter=1000, responsive_thres=-1, resistant_thres=0, random_state=None. No double quotes"/> |
231 <param argument="p_thres" type="float" value="0.001" label="P value threshold" help="Float. default=0.001"/> | 251 <param argument="p_thres" type="float" value="0.001" label="P value threshold" help="Float. default=0.001"/> |
232 <param argument="fc_thres" type="float" value="0.1" label="fold change threshold" help="Float. default=0.1"/> | 252 <param argument="fc_thres" type="float" value="0.1" label="fold change threshold" help="Float. default=0.1"/> |
233 <param argument="occurrence" type="float" value="0.7" label="reservation factor" help="Float. default=0.7"/> | 253 <param argument="occurrence" type="float" value="0.7" label="reservation factor" help="Float. default=0.7"/> |
234 <param argument="discretize" type="float" value="-1" label="The z_score threshold to discretize target value" help="Float. default=-1"/> | 254 <param argument="discretize" type="float" value="-1" label="The z_score threshold to discretize target value" help="Float. default=-1"/> |
255 </when> | |
256 <when value="preprocessors"> | |
257 <expand macro="preprocessors_sequence_encoders"/> | |
235 </when> | 258 </when> |
236 </conditional> | 259 </conditional> |
237 </repeat> | 260 </repeat> |
238 <section name="final_estimator" title="Final Estimator" expanded="true"> | 261 <section name="final_estimator" title="Final Estimator" expanded="true"> |
239 <conditional name="estimator_selector"> | 262 <conditional name="estimator_selector"> |
264 </section> | 287 </section> |
265 <param name="output_type" type="select" label="Output the final estimator instead?"> | 288 <param name="output_type" type="select" label="Output the final estimator instead?"> |
266 <option value="Pipeline_Builder" selected="true">Pipeline</option> | 289 <option value="Pipeline_Builder" selected="true">Pipeline</option> |
267 <option value="Final_Estimator_Builder">Final Estimator</option> | 290 <option value="Final_Estimator_Builder">Final Estimator</option> |
268 </param> | 291 </param> |
292 <param name="get_params" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Output parameters for searchCV?" | |
293 help="Optional. Tunable parameters could be obtained through `estimator_attributes` tool."/> | |
269 </inputs> | 294 </inputs> |
270 <outputs> | 295 <outputs> |
271 <data format="zip" name="outfile" label="${output_type}"/> | 296 <data format="zip" name="outfile" label="${output_type}"/> |
297 <data format="tabular" name="outfile_params" label="get_params for ${output_type}"> | |
298 <filter>get_params</filter> | |
299 </data> | |
272 </outputs> | 300 </outputs> |
273 <tests> | 301 <tests> |
274 <test> | 302 <test> |
275 <repeat name="pipeline_component"> | 303 <repeat name="pipeline_component"> |
276 <conditional name="component_selector"> | 304 <conditional name="component_selector"> |
470 </conditional> | 498 </conditional> |
471 </section> | 499 </section> |
472 <param name="output_type" value="Final_Estimator_Builder"/> | 500 <param name="output_type" value="Final_Estimator_Builder"/> |
473 <output name="outfile" file="pipeline15" compare="sim_size" delta="5"/> | 501 <output name="outfile" file="pipeline15" compare="sim_size" delta="5"/> |
474 </test> | 502 </test> |
503 <test> | |
504 <conditional name="component_selector"> | |
505 <param name="component_type" value="preprocessors"/> | |
506 <conditional name="encoder_selection"> | |
507 <param name="encoder_type" value="GenomeOneHotEncoder"/> | |
508 <param name="seq_length" value="1000"/> | |
509 <param name="padding" value="True"/> | |
510 </conditional> | |
511 </conditional> | |
512 <section name="final_estimator"> | |
513 <conditional name="estimator_selector"> | |
514 <param name="selected_module" value="custom_estimator"/> | |
515 <param name="c_estimator" value="keras_model02" ftype="zip"/> | |
516 </conditional> | |
517 </section> | |
518 <output name="outfile" file="pipeline16" compare="sim_size" delta="5"/> | |
519 </test> | |
475 </tests> | 520 </tests> |
476 <help> | 521 <help> |
477 <![CDATA[ | 522 <![CDATA[ |
478 **What it does** | 523 **What it does** |
479 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps | 524 Constructs a pipeline that contains a list of transfroms and a final estimator. Pipeline assembles several steps |